diff --git a/clang-tools-extra/clang-apply-replacements/CMakeLists.txt b/clang-tools-extra/clang-apply-replacements/CMakeLists.txt index 93198ccbfc406fa..551ded903e88a64 100644 --- a/clang-tools-extra/clang-apply-replacements/CMakeLists.txt +++ b/clang-tools-extra/clang-apply-replacements/CMakeLists.txt @@ -2,7 +2,7 @@ set(LLVM_LINK_COMPONENTS Support ) -add_clang_library(clangApplyReplacements +add_clang_library(clangApplyReplacements STATIC lib/Tooling/ApplyReplacements.cpp DEPENDS diff --git a/clang-tools-extra/clang-change-namespace/CMakeLists.txt b/clang-tools-extra/clang-change-namespace/CMakeLists.txt index ded91edb8e34f06..62289ad031cfd6c 100644 --- a/clang-tools-extra/clang-change-namespace/CMakeLists.txt +++ b/clang-tools-extra/clang-change-namespace/CMakeLists.txt @@ -3,7 +3,7 @@ set(LLVM_LINK_COMPONENTS Support ) -add_clang_library(clangChangeNamespace +add_clang_library(clangChangeNamespace STATIC ChangeNamespace.cpp DEPENDS diff --git a/clang-tools-extra/clang-doc/CMakeLists.txt b/clang-tools-extra/clang-doc/CMakeLists.txt index 975ad8e242e4901..520fe58cbe68e89 100644 --- a/clang-tools-extra/clang-doc/CMakeLists.txt +++ b/clang-tools-extra/clang-doc/CMakeLists.txt @@ -4,7 +4,7 @@ set(LLVM_LINK_COMPONENTS FrontendOpenMP ) -add_clang_library(clangDoc +add_clang_library(clangDoc STATIC BitcodeReader.cpp BitcodeWriter.cpp ClangDoc.cpp diff --git a/clang-tools-extra/clang-include-fixer/CMakeLists.txt b/clang-tools-extra/clang-include-fixer/CMakeLists.txt index 94afdcc3c67b6ec..00f2f6976152cc8 100644 --- a/clang-tools-extra/clang-include-fixer/CMakeLists.txt +++ b/clang-tools-extra/clang-include-fixer/CMakeLists.txt @@ -2,7 +2,7 @@ set(LLVM_LINK_COMPONENTS support ) -add_clang_library(clangIncludeFixer +add_clang_library(clangIncludeFixer STATIC IncludeFixer.cpp IncludeFixerContext.cpp InMemorySymbolIndex.cpp diff --git a/clang-tools-extra/clang-include-fixer/find-all-symbols/CMakeLists.txt b/clang-tools-extra/clang-include-fixer/find-all-symbols/CMakeLists.txt index 0ed46428b0d63d9..ad4eccfb377ecd1 100644 --- a/clang-tools-extra/clang-include-fixer/find-all-symbols/CMakeLists.txt +++ b/clang-tools-extra/clang-include-fixer/find-all-symbols/CMakeLists.txt @@ -3,7 +3,7 @@ set(LLVM_LINK_COMPONENTS FrontendOpenMP ) -add_clang_library(findAllSymbols +add_clang_library(findAllSymbols STATIC FindAllSymbols.cpp FindAllSymbolsAction.cpp FindAllMacros.cpp diff --git a/clang-tools-extra/clang-include-fixer/plugin/CMakeLists.txt b/clang-tools-extra/clang-include-fixer/plugin/CMakeLists.txt index db5d5a8324389db..b193a30ad4e1196 100644 --- a/clang-tools-extra/clang-include-fixer/plugin/CMakeLists.txt +++ b/clang-tools-extra/clang-include-fixer/plugin/CMakeLists.txt @@ -1,4 +1,4 @@ -add_clang_library(clangIncludeFixerPlugin +add_clang_library(clangIncludeFixerPlugin STATIC IncludeFixerPlugin.cpp LINK_LIBS diff --git a/clang-tools-extra/clang-move/CMakeLists.txt b/clang-tools-extra/clang-move/CMakeLists.txt index e8aeaedd61d4ddb..1380a111c5c4cb6 100644 --- a/clang-tools-extra/clang-move/CMakeLists.txt +++ b/clang-tools-extra/clang-move/CMakeLists.txt @@ -3,7 +3,7 @@ set(LLVM_LINK_COMPONENTS FrontendOpenMP ) -add_clang_library(clangMove +add_clang_library(clangMove STATIC Move.cpp HelperDeclRefGraph.cpp diff --git a/clang-tools-extra/clang-query/CMakeLists.txt b/clang-tools-extra/clang-query/CMakeLists.txt index 34f018c4a03f389..b168a3a85815673 100644 --- a/clang-tools-extra/clang-query/CMakeLists.txt +++ b/clang-tools-extra/clang-query/CMakeLists.txt @@ -4,7 +4,7 @@ set(LLVM_LINK_COMPONENTS FrontendOpenMP ) -add_clang_library(clangQuery +add_clang_library(clangQuery STATIC Query.cpp QueryParser.cpp diff --git a/clang-tools-extra/clang-reorder-fields/CMakeLists.txt b/clang-tools-extra/clang-reorder-fields/CMakeLists.txt index b82146a71158589..2fdeb65d89767b1 100644 --- a/clang-tools-extra/clang-reorder-fields/CMakeLists.txt +++ b/clang-tools-extra/clang-reorder-fields/CMakeLists.txt @@ -3,7 +3,7 @@ set(LLVM_LINK_COMPONENTS support ) -add_clang_library(clangReorderFields +add_clang_library(clangReorderFields STATIC ReorderFieldsAction.cpp DEPENDS diff --git a/clang-tools-extra/clang-tidy/CMakeLists.txt b/clang-tools-extra/clang-tidy/CMakeLists.txt index 430ea4cdbb38e19..83a3236131dc932 100644 --- a/clang-tools-extra/clang-tidy/CMakeLists.txt +++ b/clang-tools-extra/clang-tidy/CMakeLists.txt @@ -8,7 +8,7 @@ configure_file( ${CMAKE_CURRENT_BINARY_DIR}/clang-tidy-config.h) include_directories(BEFORE ${CMAKE_CURRENT_BINARY_DIR}) -add_clang_library(clangTidy +add_clang_library(clangTidy STATIC ClangTidy.cpp ClangTidyCheck.cpp ClangTidyModule.cpp diff --git a/clang-tools-extra/clang-tidy/abseil/CMakeLists.txt b/clang-tools-extra/clang-tidy/abseil/CMakeLists.txt index 489d732abaa8de0..ca7cc6782f1e6db 100644 --- a/clang-tools-extra/clang-tidy/abseil/CMakeLists.txt +++ b/clang-tools-extra/clang-tidy/abseil/CMakeLists.txt @@ -3,7 +3,7 @@ set(LLVM_LINK_COMPONENTS FrontendOpenMP ) -add_clang_library(clangTidyAbseilModule +add_clang_library(clangTidyAbseilModule STATIC AbseilTidyModule.cpp CleanupCtadCheck.cpp DurationAdditionCheck.cpp diff --git a/clang-tools-extra/clang-tidy/altera/CMakeLists.txt b/clang-tools-extra/clang-tidy/altera/CMakeLists.txt index f885993c3c9e717..5d9e7b3a023bb1b 100644 --- a/clang-tools-extra/clang-tidy/altera/CMakeLists.txt +++ b/clang-tools-extra/clang-tidy/altera/CMakeLists.txt @@ -3,7 +3,7 @@ set(LLVM_LINK_COMPONENTS support ) -add_clang_library(clangTidyAlteraModule +add_clang_library(clangTidyAlteraModule STATIC AlteraTidyModule.cpp IdDependentBackwardBranchCheck.cpp KernelNameRestrictionCheck.cpp diff --git a/clang-tools-extra/clang-tidy/android/CMakeLists.txt b/clang-tools-extra/clang-tidy/android/CMakeLists.txt index c33d0daf4e25c05..b461730ef14188f 100644 --- a/clang-tools-extra/clang-tidy/android/CMakeLists.txt +++ b/clang-tools-extra/clang-tidy/android/CMakeLists.txt @@ -3,7 +3,7 @@ set(LLVM_LINK_COMPONENTS FrontendOpenMP ) -add_clang_library(clangTidyAndroidModule +add_clang_library(clangTidyAndroidModule STATIC AndroidTidyModule.cpp CloexecAccept4Check.cpp CloexecAcceptCheck.cpp diff --git a/clang-tools-extra/clang-tidy/boost/CMakeLists.txt b/clang-tools-extra/clang-tidy/boost/CMakeLists.txt index fed3c3ba01c1693..9032771ec2fdcc6 100644 --- a/clang-tools-extra/clang-tidy/boost/CMakeLists.txt +++ b/clang-tools-extra/clang-tidy/boost/CMakeLists.txt @@ -3,7 +3,7 @@ set(LLVM_LINK_COMPONENTS FrontendOpenMP ) -add_clang_library(clangTidyBoostModule +add_clang_library(clangTidyBoostModule STATIC BoostTidyModule.cpp UseRangesCheck.cpp UseToStringCheck.cpp diff --git a/clang-tools-extra/clang-tidy/bugprone/CMakeLists.txt b/clang-tools-extra/clang-tidy/bugprone/CMakeLists.txt index 24fc5f23249c0d7..f0667bbfdd87f7f 100644 --- a/clang-tools-extra/clang-tidy/bugprone/CMakeLists.txt +++ b/clang-tools-extra/clang-tidy/bugprone/CMakeLists.txt @@ -3,7 +3,7 @@ set(LLVM_LINK_COMPONENTS FrontendOpenMP ) -add_clang_library(clangTidyBugproneModule +add_clang_library(clangTidyBugproneModule STATIC ArgumentCommentCheck.cpp AssertSideEffectCheck.cpp AssignmentInIfConditionCheck.cpp diff --git a/clang-tools-extra/clang-tidy/cert/CMakeLists.txt b/clang-tools-extra/clang-tidy/cert/CMakeLists.txt index 882735c9d1e0de4..e3187b28399c700 100644 --- a/clang-tools-extra/clang-tidy/cert/CMakeLists.txt +++ b/clang-tools-extra/clang-tidy/cert/CMakeLists.txt @@ -3,7 +3,7 @@ set(LLVM_LINK_COMPONENTS FrontendOpenMP ) -add_clang_library(clangTidyCERTModule +add_clang_library(clangTidyCERTModule STATIC CERTTidyModule.cpp CommandProcessorCheck.cpp DefaultOperatorNewAlignmentCheck.cpp diff --git a/clang-tools-extra/clang-tidy/concurrency/CMakeLists.txt b/clang-tools-extra/clang-tidy/concurrency/CMakeLists.txt index 3dab6aaf8aea27b..91c6cedabcc8079 100644 --- a/clang-tools-extra/clang-tidy/concurrency/CMakeLists.txt +++ b/clang-tools-extra/clang-tidy/concurrency/CMakeLists.txt @@ -3,7 +3,7 @@ set(LLVM_LINK_COMPONENTS Support ) -add_clang_library(clangTidyConcurrencyModule +add_clang_library(clangTidyConcurrencyModule STATIC ConcurrencyTidyModule.cpp MtUnsafeCheck.cpp ThreadCanceltypeAsynchronousCheck.cpp diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/CMakeLists.txt b/clang-tools-extra/clang-tidy/cppcoreguidelines/CMakeLists.txt index eb35bbc6a538fea..07bb89ec7937a05 100644 --- a/clang-tools-extra/clang-tidy/cppcoreguidelines/CMakeLists.txt +++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/CMakeLists.txt @@ -3,7 +3,7 @@ set(LLVM_LINK_COMPONENTS Support ) -add_clang_library(clangTidyCppCoreGuidelinesModule +add_clang_library(clangTidyCppCoreGuidelinesModule STATIC AvoidCapturingLambdaCoroutinesCheck.cpp AvoidConstOrRefDataMembersCheck.cpp AvoidDoWhileCheck.cpp diff --git a/clang-tools-extra/clang-tidy/darwin/CMakeLists.txt b/clang-tools-extra/clang-tidy/darwin/CMakeLists.txt index 6f6b3607b3ec645..d19e8144bcfa3e7 100644 --- a/clang-tools-extra/clang-tidy/darwin/CMakeLists.txt +++ b/clang-tools-extra/clang-tidy/darwin/CMakeLists.txt @@ -3,7 +3,7 @@ set(LLVM_LINK_COMPONENTS Support ) -add_clang_library(clangTidyDarwinModule +add_clang_library(clangTidyDarwinModule STATIC AvoidSpinlockCheck.cpp DarwinTidyModule.cpp DispatchOnceNonstaticCheck.cpp diff --git a/clang-tools-extra/clang-tidy/fuchsia/CMakeLists.txt b/clang-tools-extra/clang-tidy/fuchsia/CMakeLists.txt index d0e68bfec47fec0..c12c281bc53211b 100644 --- a/clang-tools-extra/clang-tidy/fuchsia/CMakeLists.txt +++ b/clang-tools-extra/clang-tidy/fuchsia/CMakeLists.txt @@ -3,7 +3,7 @@ set(LLVM_LINK_COMPONENTS Support ) -add_clang_library(clangTidyFuchsiaModule +add_clang_library(clangTidyFuchsiaModule STATIC DefaultArgumentsCallsCheck.cpp DefaultArgumentsDeclarationsCheck.cpp FuchsiaTidyModule.cpp diff --git a/clang-tools-extra/clang-tidy/google/CMakeLists.txt b/clang-tools-extra/clang-tidy/google/CMakeLists.txt index fcba2b1b214adce..2470c089ef7ca3d 100644 --- a/clang-tools-extra/clang-tidy/google/CMakeLists.txt +++ b/clang-tools-extra/clang-tidy/google/CMakeLists.txt @@ -3,7 +3,7 @@ set(LLVM_LINK_COMPONENTS Support ) -add_clang_library(clangTidyGoogleModule +add_clang_library(clangTidyGoogleModule STATIC AvoidCStyleCastsCheck.cpp AvoidNSObjectNewCheck.cpp AvoidThrowingObjCExceptionCheck.cpp diff --git a/clang-tools-extra/clang-tidy/hicpp/CMakeLists.txt b/clang-tools-extra/clang-tidy/hicpp/CMakeLists.txt index 132fbaccccf8a9c..2f31d168e65c0c2 100644 --- a/clang-tools-extra/clang-tidy/hicpp/CMakeLists.txt +++ b/clang-tools-extra/clang-tidy/hicpp/CMakeLists.txt @@ -3,7 +3,7 @@ set(LLVM_LINK_COMPONENTS Support ) -add_clang_library(clangTidyHICPPModule +add_clang_library(clangTidyHICPPModule STATIC ExceptionBaseclassCheck.cpp HICPPTidyModule.cpp IgnoredRemoveResultCheck.cpp diff --git a/clang-tools-extra/clang-tidy/linuxkernel/CMakeLists.txt b/clang-tools-extra/clang-tidy/linuxkernel/CMakeLists.txt index 403589d9475904f..e7e2dcf9a82fe14 100644 --- a/clang-tools-extra/clang-tidy/linuxkernel/CMakeLists.txt +++ b/clang-tools-extra/clang-tidy/linuxkernel/CMakeLists.txt @@ -3,7 +3,7 @@ set(LLVM_LINK_COMPONENTS Support ) -add_clang_library(clangTidyLinuxKernelModule +add_clang_library(clangTidyLinuxKernelModule STATIC LinuxKernelTidyModule.cpp MustCheckErrsCheck.cpp diff --git a/clang-tools-extra/clang-tidy/llvm/CMakeLists.txt b/clang-tools-extra/clang-tidy/llvm/CMakeLists.txt index b56498bdc8c4b24..79c58a19aedac6c 100644 --- a/clang-tools-extra/clang-tidy/llvm/CMakeLists.txt +++ b/clang-tools-extra/clang-tidy/llvm/CMakeLists.txt @@ -3,7 +3,7 @@ set(LLVM_LINK_COMPONENTS Support ) -add_clang_library(clangTidyLLVMModule +add_clang_library(clangTidyLLVMModule STATIC HeaderGuardCheck.cpp IncludeOrderCheck.cpp LLVMTidyModule.cpp diff --git a/clang-tools-extra/clang-tidy/llvmlibc/CMakeLists.txt b/clang-tools-extra/clang-tidy/llvmlibc/CMakeLists.txt index b071cfd67dcf421..eaeddf4f628573d 100644 --- a/clang-tools-extra/clang-tidy/llvmlibc/CMakeLists.txt +++ b/clang-tools-extra/clang-tidy/llvmlibc/CMakeLists.txt @@ -3,7 +3,7 @@ set(LLVM_LINK_COMPONENTS Support ) -add_clang_library(clangTidyLLVMLibcModule +add_clang_library(clangTidyLLVMLibcModule STATIC CalleeNamespaceCheck.cpp ImplementationInNamespaceCheck.cpp InlineFunctionDeclCheck.cpp diff --git a/clang-tools-extra/clang-tidy/misc/CMakeLists.txt b/clang-tools-extra/clang-tidy/misc/CMakeLists.txt index 1c1d3b836ea1b8c..fd7affd22a463eb 100644 --- a/clang-tools-extra/clang-tidy/misc/CMakeLists.txt +++ b/clang-tools-extra/clang-tidy/misc/CMakeLists.txt @@ -17,7 +17,7 @@ add_custom_command( add_custom_target(genconfusable DEPENDS Confusables.inc) set_target_properties(genconfusable PROPERTIES FOLDER "Clang Tools Extra/Sourcegenning") -add_clang_library(clangTidyMiscModule +add_clang_library(clangTidyMiscModule STATIC ConstCorrectnessCheck.cpp CoroutineHostileRAIICheck.cpp DefinitionsInHeadersCheck.cpp diff --git a/clang-tools-extra/clang-tidy/modernize/CMakeLists.txt b/clang-tools-extra/clang-tidy/modernize/CMakeLists.txt index 4f68c487cac9d44..c919d49b42873ad 100644 --- a/clang-tools-extra/clang-tidy/modernize/CMakeLists.txt +++ b/clang-tools-extra/clang-tidy/modernize/CMakeLists.txt @@ -3,7 +3,7 @@ set(LLVM_LINK_COMPONENTS Support ) -add_clang_library(clangTidyModernizeModule +add_clang_library(clangTidyModernizeModule STATIC AvoidBindCheck.cpp AvoidCArraysCheck.cpp ConcatNestedNamespacesCheck.cpp diff --git a/clang-tools-extra/clang-tidy/mpi/CMakeLists.txt b/clang-tools-extra/clang-tidy/mpi/CMakeLists.txt index 717683042f5247b..1232a28c7cf7be4 100644 --- a/clang-tools-extra/clang-tidy/mpi/CMakeLists.txt +++ b/clang-tools-extra/clang-tidy/mpi/CMakeLists.txt @@ -3,7 +3,7 @@ set(LLVM_LINK_COMPONENTS Support ) -add_clang_library(clangTidyMPIModule +add_clang_library(clangTidyMPIModule STATIC BufferDerefCheck.cpp MPITidyModule.cpp TypeMismatchCheck.cpp diff --git a/clang-tools-extra/clang-tidy/objc/CMakeLists.txt b/clang-tools-extra/clang-tidy/objc/CMakeLists.txt index aa428fce56a599b..e28d25deee84c18 100644 --- a/clang-tools-extra/clang-tidy/objc/CMakeLists.txt +++ b/clang-tools-extra/clang-tidy/objc/CMakeLists.txt @@ -3,7 +3,7 @@ set(LLVM_LINK_COMPONENTS Support ) -add_clang_library(clangTidyObjCModule +add_clang_library(clangTidyObjCModule STATIC AssertEquals.cpp AvoidNSErrorInitCheck.cpp DeallocInCategoryCheck.cpp diff --git a/clang-tools-extra/clang-tidy/openmp/CMakeLists.txt b/clang-tools-extra/clang-tidy/openmp/CMakeLists.txt index 4ef61f88c050978..acee08a48aeff39 100644 --- a/clang-tools-extra/clang-tidy/openmp/CMakeLists.txt +++ b/clang-tools-extra/clang-tidy/openmp/CMakeLists.txt @@ -3,7 +3,7 @@ set(LLVM_LINK_COMPONENTS Support ) -add_clang_library(clangTidyOpenMPModule +add_clang_library(clangTidyOpenMPModule STATIC ExceptionEscapeCheck.cpp OpenMPTidyModule.cpp UseDefaultNoneCheck.cpp diff --git a/clang-tools-extra/clang-tidy/performance/CMakeLists.txt b/clang-tools-extra/clang-tidy/performance/CMakeLists.txt index 81128ff086021ed..c6e547c5089fb0e 100644 --- a/clang-tools-extra/clang-tidy/performance/CMakeLists.txt +++ b/clang-tools-extra/clang-tidy/performance/CMakeLists.txt @@ -3,7 +3,7 @@ set(LLVM_LINK_COMPONENTS Support ) -add_clang_library(clangTidyPerformanceModule +add_clang_library(clangTidyPerformanceModule STATIC AvoidEndlCheck.cpp EnumSizeCheck.cpp FasterStringFindCheck.cpp diff --git a/clang-tools-extra/clang-tidy/plugin/CMakeLists.txt b/clang-tools-extra/clang-tidy/plugin/CMakeLists.txt index 673da472a746806..aff2018b693b10a 100644 --- a/clang-tools-extra/clang-tidy/plugin/CMakeLists.txt +++ b/clang-tools-extra/clang-tidy/plugin/CMakeLists.txt @@ -1,4 +1,4 @@ -add_clang_library(clangTidyPlugin +add_clang_library(clangTidyPlugin STATIC ClangTidyPlugin.cpp LINK_LIBS diff --git a/clang-tools-extra/clang-tidy/portability/CMakeLists.txt b/clang-tools-extra/clang-tidy/portability/CMakeLists.txt index 01a86d686daa76d..3f0b7d47207938a 100644 --- a/clang-tools-extra/clang-tidy/portability/CMakeLists.txt +++ b/clang-tools-extra/clang-tidy/portability/CMakeLists.txt @@ -4,7 +4,7 @@ set(LLVM_LINK_COMPONENTS TargetParser ) -add_clang_library(clangTidyPortabilityModule +add_clang_library(clangTidyPortabilityModule STATIC PortabilityTidyModule.cpp RestrictSystemIncludesCheck.cpp SIMDIntrinsicsCheck.cpp diff --git a/clang-tools-extra/clang-tidy/readability/CMakeLists.txt b/clang-tools-extra/clang-tidy/readability/CMakeLists.txt index 41065fc8e878590..8f303c51e1b0da2 100644 --- a/clang-tools-extra/clang-tidy/readability/CMakeLists.txt +++ b/clang-tools-extra/clang-tidy/readability/CMakeLists.txt @@ -3,7 +3,7 @@ set(LLVM_LINK_COMPONENTS Support ) -add_clang_library(clangTidyReadabilityModule +add_clang_library(clangTidyReadabilityModule STATIC AvoidConstParamsInDecls.cpp AvoidNestedConditionalOperatorCheck.cpp AvoidReturnWithVoidValueCheck.cpp diff --git a/clang-tools-extra/clang-tidy/tool/CMakeLists.txt b/clang-tools-extra/clang-tidy/tool/CMakeLists.txt index b220cbea80f1b6d..81fba3bbf12fe4a 100644 --- a/clang-tools-extra/clang-tidy/tool/CMakeLists.txt +++ b/clang-tools-extra/clang-tidy/tool/CMakeLists.txt @@ -9,7 +9,7 @@ set(LLVM_LINK_COMPONENTS # Needed by LLVM's CMake checks because this file defines multiple targets. set(LLVM_OPTIONAL_SOURCES ClangTidyMain.cpp ClangTidyToolMain.cpp) -add_clang_library(clangTidyMain +add_clang_library(clangTidyMain STATIC ClangTidyMain.cpp LINK_LIBS diff --git a/clang-tools-extra/clang-tidy/utils/CMakeLists.txt b/clang-tools-extra/clang-tidy/utils/CMakeLists.txt index 504c6e928bdad00..b83a1e9a77182b9 100644 --- a/clang-tools-extra/clang-tidy/utils/CMakeLists.txt +++ b/clang-tools-extra/clang-tidy/utils/CMakeLists.txt @@ -3,7 +3,7 @@ set(LLVM_LINK_COMPONENTS Support ) -add_clang_library(clangTidyUtils +add_clang_library(clangTidyUtils STATIC Aliasing.cpp ASTUtils.cpp BracesAroundStatement.cpp diff --git a/clang-tools-extra/clang-tidy/zircon/CMakeLists.txt b/clang-tools-extra/clang-tidy/zircon/CMakeLists.txt index cd605d6d6c108dc..e08fe80e730ac5c 100644 --- a/clang-tools-extra/clang-tidy/zircon/CMakeLists.txt +++ b/clang-tools-extra/clang-tidy/zircon/CMakeLists.txt @@ -3,7 +3,7 @@ set(LLVM_LINK_COMPONENTS Support ) -add_clang_library(clangTidyZirconModule +add_clang_library(clangTidyZirconModule STATIC TemporaryObjectsCheck.cpp ZirconTidyModule.cpp diff --git a/clang-tools-extra/clangd/CMakeLists.txt b/clang-tools-extra/clangd/CMakeLists.txt index 8dcbf5f47e056a1..d797ddce8c44d12 100644 --- a/clang-tools-extra/clangd/CMakeLists.txt +++ b/clang-tools-extra/clangd/CMakeLists.txt @@ -61,7 +61,7 @@ endif() include_directories(BEFORE "${CMAKE_CURRENT_BINARY_DIR}/../clang-tidy") include_directories(BEFORE "${CMAKE_CURRENT_SOURCE_DIR}/../include-cleaner/include") -add_clang_library(clangDaemon +add_clang_library(clangDaemon STATIC AST.cpp ASTSignals.cpp ClangdLSPServer.cpp diff --git a/clang-tools-extra/clangd/index/remote/CMakeLists.txt b/clang-tools-extra/clangd/index/remote/CMakeLists.txt index 106bbeff84ccf35..28df71855a14214 100644 --- a/clang-tools-extra/clangd/index/remote/CMakeLists.txt +++ b/clang-tools-extra/clangd/index/remote/CMakeLists.txt @@ -19,7 +19,7 @@ if (CLANGD_ENABLE_REMOTE) # target-local? add_definitions(-DGOOGLE_PROTOBUF_NO_RTTI=1) - add_clang_library(clangdRemoteIndex + add_clang_library(clangdRemoteIndex STATIC Client.cpp LINK_LIBS diff --git a/clang-tools-extra/clangd/index/remote/marshalling/CMakeLists.txt b/clang-tools-extra/clangd/index/remote/marshalling/CMakeLists.txt index 071802a96264716..d7e37003e5f984a 100644 --- a/clang-tools-extra/clangd/index/remote/marshalling/CMakeLists.txt +++ b/clang-tools-extra/clangd/index/remote/marshalling/CMakeLists.txt @@ -1,4 +1,4 @@ -add_clang_library(clangdRemoteMarshalling +add_clang_library(clangdRemoteMarshalling STATIC Marshalling.cpp LINK_LIBS diff --git a/clang-tools-extra/clangd/index/remote/unimplemented/CMakeLists.txt b/clang-tools-extra/clangd/index/remote/unimplemented/CMakeLists.txt index 86d13c77ce87461..5c4907f0d7a8326 100644 --- a/clang-tools-extra/clangd/index/remote/unimplemented/CMakeLists.txt +++ b/clang-tools-extra/clangd/index/remote/unimplemented/CMakeLists.txt @@ -2,7 +2,7 @@ include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../../../) # When compiled without Remote Index support, the real implementation index # client is not present. Users will get a notification about this when trying # to connect to remote index server instance. -add_clang_library(clangdRemoteIndex +add_clang_library(clangdRemoteIndex STATIC UnimplementedClient.cpp LINK_LIBS diff --git a/clang-tools-extra/clangd/support/CMakeLists.txt b/clang-tools-extra/clangd/support/CMakeLists.txt index 506a3f2c8551d5c..c0be846ebbbf433 100644 --- a/clang-tools-extra/clangd/support/CMakeLists.txt +++ b/clang-tools-extra/clangd/support/CMakeLists.txt @@ -15,7 +15,7 @@ if(NOT HAVE_CXX_ATOMICS_WITHOUT_LIB OR NOT HAVE_CXX_ATOMICS64_WITHOUT_LIB) list(APPEND CLANGD_ATOMIC_LIB "atomic") endif() -add_clang_library(clangdSupport +add_clang_library(clangdSupport STATIC Bracket.cpp Cancellation.cpp Context.cpp diff --git a/clang-tools-extra/clangd/tool/CMakeLists.txt b/clang-tools-extra/clangd/tool/CMakeLists.txt index 4012b6401c00803..850a82833e44436 100644 --- a/clang-tools-extra/clangd/tool/CMakeLists.txt +++ b/clang-tools-extra/clangd/tool/CMakeLists.txt @@ -1,7 +1,7 @@ # Needed by LLVM's CMake checks because this file defines multiple targets. set(LLVM_OPTIONAL_SOURCES ClangdToolMain.cpp) -add_clang_library(clangdMain +add_clang_library(clangdMain STATIC ClangdMain.cpp Check.cpp ) diff --git a/clang-tools-extra/clangd/xpc/CMakeLists.txt b/clang-tools-extra/clangd/xpc/CMakeLists.txt index b35c509a3f53dbd..f05d949646731c8 100644 --- a/clang-tools-extra/clangd/xpc/CMakeLists.txt +++ b/clang-tools-extra/clangd/xpc/CMakeLists.txt @@ -14,12 +14,12 @@ set(LLVM_LINK_COMPONENTS # Needed by LLVM's CMake checks because this file defines multiple targets. set(LLVM_OPTIONAL_SOURCES Conversion.cpp XPCTransport.cpp) -add_clang_library(clangdXpcJsonConversions +add_clang_library(clangdXpcJsonConversions STATIC Conversion.cpp LINK_LIBS clangDaemon clangdSupport ) -add_clang_library(clangdXpcTransport +add_clang_library(clangdXpcTransport STATIC XPCTransport.cpp LINK_LIBS clangDaemon clangdSupport clangdXpcJsonConversions DEPENDS ClangDriverOptions diff --git a/clang-tools-extra/docs/clang-tidy/index.rst b/clang-tools-extra/docs/clang-tidy/index.rst index e38141bdb8be1f2..a4233d5d8e26942 100644 --- a/clang-tools-extra/docs/clang-tidy/index.rst +++ b/clang-tools-extra/docs/clang-tidy/index.rst @@ -287,7 +287,7 @@ An overview of all the command-line options: FormatStyle - Same as '--format-style'. HeaderFileExtensions - File extensions to consider to determine if a given diagnostic is located in a header file. - HeaderFilterRegex - Same as '--header-filter-regex'. + HeaderFilterRegex - Same as '--header-filter'. ImplementationFileExtensions - File extensions to consider to determine if a given diagnostic is located in an implementation file. diff --git a/clang-tools-extra/include-cleaner/lib/CMakeLists.txt b/clang-tools-extra/include-cleaner/lib/CMakeLists.txt index 208791a1a7f1ed0..7ad5325f6026dd6 100644 --- a/clang-tools-extra/include-cleaner/lib/CMakeLists.txt +++ b/clang-tools-extra/include-cleaner/lib/CMakeLists.txt @@ -1,6 +1,6 @@ set(LLVM_LINK_COMPONENTS Support) -add_clang_library(clangIncludeCleaner +add_clang_library(clangIncludeCleaner STATIC Analysis.cpp IncludeSpeller.cpp FindHeaders.cpp diff --git a/clang/CodeOwners.rst b/clang/Maintainers.rst similarity index 87% rename from clang/CodeOwners.rst rename to clang/Maintainers.rst index f067b7183ae7370..ee5334b02f7000a 100644 --- a/clang/CodeOwners.rst +++ b/clang/Maintainers.rst @@ -1,36 +1,36 @@ ================= -Clang Code Owners +Clang Maintainers ================= This file is a list of the -`code owners `_ for +`maintainers `_ for Clang. .. contents:: :depth: 2 :local: -Current Code Owners -=================== -The following people are the active code owners for the project. Please reach +Active Maintainers +================== +The following people are the active maintainers for the project. Please reach out to them for code reviews, questions about their area of expertise, or other assistance. -All parts of Clang not covered by someone else ----------------------------------------------- +Lead Maintainer +--------------- | Aaron Ballman | aaron\@aaronballman.com (email), aaron.ballman (Phabricator), AaronBallman (GitHub), AaronBallman (Discourse), aaronballman (Discord), AaronBallman (IRC) Contained Components -------------------- -These code owners are responsible for particular high-level components within +These maintainers are responsible for particular high-level components within Clang that are typically contained to one area of the compiler. AST matchers ~~~~~~~~~~~~ -| Manuel Klimek -| klimek\@google.com (email), klimek (Phabricator), r4nt (GitHub) +| Aaron Ballman +| aaron\@aaronballman.com (email), aaron.ballman (Phabricator), AaronBallman (GitHub), AaronBallman (Discourse), aaronballman (Discord), AaronBallman (IRC) Clang LLVM IR generation @@ -60,7 +60,7 @@ Analysis & CFG Experimental new constant interpreter ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | Timm Bäder -| tbaeder\@redhat.com (email), tbaeder (Phabricator), tbaederr (GitHub), tbaeder (Discourse), tbaeder (Discord) +| tbaeder\@redhat.com (em), tbaeder (Phabricator), tbaederr (GitHub), tbaeder (Discourse), tbaeder (Discord) Modules & serialization @@ -125,14 +125,9 @@ Driver parts not covered by someone else Tools ----- -These code owners are responsible for user-facing tools under the Clang +These maintainers are responsible for user-facing tools under the Clang umbrella or components used to support such tools. -Tooling library -~~~~~~~~~~~~~~~ -| Manuel Klimek -| klimek\@google.com (email), klimek (Phabricator), r4nt (GitHub) - clang-format ~~~~~~~~~~~~ @@ -255,19 +250,20 @@ SYCL conformance | alexey.bader\@intel.com (email), bader (Phabricator), bader (GitHub) -Former Code Owners -================== -The following people have graciously spent time performing code ownership +Inactive Maintainers +==================== +The following people have graciously spent time performing maintainership responsibilities but are no longer active in that role. Thank you for all your help with the success of the project! -Emeritus owners ---------------- +Emeritus Lead Maintainers +------------------------- | Doug Gregor (dgregor\@apple.com) | Richard Smith (richard\@metafoo.co.uk) -Former component owners ------------------------ +Inactive component maintainers +------------------------------ | Chandler Carruth (chandlerc\@gmail.com, chandlerc\@google.com) -- CMake, library layering | Devin Coughlin (dcoughlin\@apple.com) -- Clang static analyzer +| Manuel Klimek (klimek\@google.com (email), klimek (Phabricator), r4nt (GitHub)) -- Tooling, AST matchers diff --git a/clang/docs/CMakeLists.txt b/clang/docs/CMakeLists.txt index 51e9db29f887f3b..4fecc007f599541 100644 --- a/clang/docs/CMakeLists.txt +++ b/clang/docs/CMakeLists.txt @@ -117,7 +117,7 @@ if (LLVM_ENABLE_SPHINX) "${CMAKE_CURRENT_SOURCE_DIR}" "${CMAKE_CURRENT_BINARY_DIR}" COMMAND "${CMAKE_COMMAND}" -E copy_if_different - "${CMAKE_CURRENT_SOURCE_DIR}/../CodeOwners.rst" + "${CMAKE_CURRENT_SOURCE_DIR}/../Maintainers.rst" "${CMAKE_CURRENT_BINARY_DIR}" ) diff --git a/clang/docs/ClangFormatStyleOptions.rst b/clang/docs/ClangFormatStyleOptions.rst index 7a1508ee858c41b..a9bfb4c4a0fcb2c 100644 --- a/clang/docs/ClangFormatStyleOptions.rst +++ b/clang/docs/ClangFormatStyleOptions.rst @@ -249,7 +249,7 @@ the configuration (without a prefix: ``Auto``). .. _AlignArrayOfStructures: **AlignArrayOfStructures** (``ArrayInitializerAlignmentStyle``) :versionbadge:`clang-format 13` :ref:`¶ ` - if not ``None``, when using initialization for an array of structs + If not ``None``, when using initialization for an array of structs aligns the fields into columns. @@ -307,11 +307,12 @@ the configuration (without a prefix: ``Auto``). Alignment options. They can also be read as a whole for compatibility. The choices are: - - None - - Consecutive - - AcrossEmptyLines - - AcrossComments - - AcrossEmptyLinesAndComments + + * ``None`` + * ``Consecutive`` + * ``AcrossEmptyLines`` + * ``AcrossComments`` + * ``AcrossEmptyLinesAndComments`` For example, to align across empty lines and not across comments, either of these work. @@ -464,11 +465,12 @@ the configuration (without a prefix: ``Auto``). Alignment options. They can also be read as a whole for compatibility. The choices are: - - None - - Consecutive - - AcrossEmptyLines - - AcrossComments - - AcrossEmptyLinesAndComments + + * ``None`` + * ``Consecutive`` + * ``AcrossEmptyLines`` + * ``AcrossComments`` + * ``AcrossEmptyLinesAndComments`` For example, to align across empty lines and not across comments, either of these work. @@ -621,11 +623,12 @@ the configuration (without a prefix: ``Auto``). Alignment options. They can also be read as a whole for compatibility. The choices are: - - None - - Consecutive - - AcrossEmptyLines - - AcrossComments - - AcrossEmptyLinesAndComments + + * ``None`` + * ``Consecutive`` + * ``AcrossEmptyLines`` + * ``AcrossComments`` + * ``AcrossEmptyLinesAndComments`` For example, to align across empty lines and not across comments, either of these work. @@ -779,11 +782,12 @@ the configuration (without a prefix: ``Auto``). Alignment options. They can also be read as a whole for compatibility. The choices are: - - None - - Consecutive - - AcrossEmptyLines - - AcrossComments - - AcrossEmptyLinesAndComments + + * ``None`` + * ``Consecutive`` + * ``AcrossEmptyLines`` + * ``AcrossComments`` + * ``AcrossEmptyLinesAndComments`` For example, to align across empty lines and not across comments, either of these work. @@ -1056,11 +1060,12 @@ the configuration (without a prefix: ``Auto``). Alignment options. They can also be read as a whole for compatibility. The choices are: - - None - - Consecutive - - AcrossEmptyLines - - AcrossComments - - AcrossEmptyLinesAndComments + + * ``None`` + * ``Consecutive`` + * ``AcrossEmptyLines`` + * ``AcrossComments`` + * ``AcrossEmptyLinesAndComments`` For example, to align across empty lines and not across comments, either of these work. @@ -1211,11 +1216,12 @@ the configuration (without a prefix: ``Auto``). Alignment options. They can also be read as a whole for compatibility. The choices are: - - None - - Consecutive - - AcrossEmptyLines - - AcrossComments - - AcrossEmptyLinesAndComments + + * ``None`` + * ``Consecutive`` + * ``AcrossEmptyLines`` + * ``AcrossComments`` + * ``AcrossEmptyLinesAndComments`` For example, to align across empty lines and not across comments, either of these work. @@ -1366,11 +1372,12 @@ the configuration (without a prefix: ``Auto``). Alignment options. They can also be read as a whole for compatibility. The choices are: - - None - - Consecutive - - AcrossEmptyLines - - AcrossComments - - AcrossEmptyLinesAndComments + + * ``None`` + * ``Consecutive`` + * ``AcrossEmptyLines`` + * ``AcrossComments`` + * ``AcrossEmptyLinesAndComments`` For example, to align across empty lines and not across comments, either of these work. diff --git a/clang/docs/CodeOwners.rst b/clang/docs/CodeOwners.rst deleted file mode 100644 index 48128fbc5d909ba..000000000000000 --- a/clang/docs/CodeOwners.rst +++ /dev/null @@ -1 +0,0 @@ -.. include:: ../CodeOwners.rst diff --git a/clang/docs/Maintainers.rst b/clang/docs/Maintainers.rst new file mode 100644 index 000000000000000..7e69c1165d2f54f --- /dev/null +++ b/clang/docs/Maintainers.rst @@ -0,0 +1 @@ +.. include:: ../Maintainers.rst diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 5cc9c8047a70ef2..21c6e3c98e87a4e 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -378,6 +378,8 @@ Improvements to Clang's diagnostics - Clang now emits a diagnostic note at the class declaration when the method definition does not match any declaration (#GH110638). +- Clang now omits warnings for extra parentheses in fold expressions with single expansion (#GH101863). + Improvements to Clang's time-trace ---------------------------------- @@ -393,6 +395,8 @@ Bug Fixes in This Version - Fixed a crash when trying to transform a dependent address space type. Fixes #GH101685. - Fixed a crash when diagnosing format strings and encountering an empty delimited escape sequence (e.g., ``"\o{}"``). #GH102218 +- The warning emitted for an unsupported register variable type now points to + the unsupported type instead of the ``register`` keyword (#GH109776). Bug Fixes to Compiler Builtins ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -471,8 +475,12 @@ Bug Fixes to C++ Support - Fixed an issue deducing non-type template arguments of reference type. (#GH73460) - Fixed an issue in constraint evaluation, where type constraints on the lambda expression containing outer unexpanded parameters were not correctly expanded. (#GH101754) +- Fixes crashes with function template member specializations, and increases + conformance of explicit instantiation behaviour with MSVC. (#GH111266) - Fixed a bug in constraint expression comparison where the ``sizeof...`` expression was not handled properly in certain friend declarations. (#GH93099) +- Clang now instantiates the correct lambda call operator when a lambda's class type is + merged across modules. (#GH110401) Bug Fixes to AST Handling ^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -510,6 +518,10 @@ OpenACC Specific Changes Target Specific Changes ----------------------- +- Clang now implements the Solaris-specific mangling of ``std::tm`` as + ``tm``, same for ``std::div_t``, ``std::ldiv_t``, and + ``std::lconv``, for Solaris ABI compatibility. (#GH33114) + AMDGPU Support ^^^^^^^^^^^^^^ @@ -636,8 +648,8 @@ New features if class of allocation and deallocation function mismatches. `Documentation `__. -- Function effects, e.g. the ``nonblocking`` and ``nonallocating`` "performance constraint" - attributes, are now verified. For example, for functions declared with the ``nonblocking`` +- Function effects, e.g. the ``nonblocking`` and ``nonallocating`` "performance constraint" + attributes, are now verified. For example, for functions declared with the ``nonblocking`` attribute, the compiler can generate warnings about the use of any language features, or calls to other functions, which may block. diff --git a/clang/docs/index.rst b/clang/docs/index.rst index 4a497f4d9bcc3c3..f4fdc93290a0d96 100644 --- a/clang/docs/index.rst +++ b/clang/docs/index.rst @@ -104,7 +104,7 @@ Design Documents .. toctree:: :maxdepth: 1 - CodeOwners + Maintainers InternalsManual DriverInternals Multilib diff --git a/clang/include/clang/AST/Expr.h b/clang/include/clang/AST/Expr.h index 57353855c51e7cb..607bf313c4d95e5 100644 --- a/clang/include/clang/AST/Expr.h +++ b/clang/include/clang/AST/Expr.h @@ -2170,11 +2170,13 @@ class SYCLUniqueStableNameExpr final : public Expr { class ParenExpr : public Expr { SourceLocation L, R; Stmt *Val; + public: ParenExpr(SourceLocation l, SourceLocation r, Expr *val) : Expr(ParenExprClass, val->getType(), val->getValueKind(), val->getObjectKind()), L(l), R(r), Val(val) { + ParenExprBits.ProducedByFoldExpansion = false; setDependence(computeDependence(this)); } @@ -2206,6 +2208,13 @@ class ParenExpr : public Expr { const_child_range children() const { return const_child_range(&Val, &Val + 1); } + + bool isProducedByFoldExpansion() const { + return ParenExprBits.ProducedByFoldExpansion != 0; + } + void setIsProducedByFoldExpansion(bool ProducedByFoldExpansion = true) { + ParenExprBits.ProducedByFoldExpansion = ProducedByFoldExpansion; + } }; /// UnaryOperator - This represents the unary-expression's (except sizeof and diff --git a/clang/include/clang/AST/Stmt.h b/clang/include/clang/AST/Stmt.h index 7aed83e9c68bb7b..83fafbabb1d460c 100644 --- a/clang/include/clang/AST/Stmt.h +++ b/clang/include/clang/AST/Stmt.h @@ -719,6 +719,18 @@ class alignas(void *) Stmt { unsigned Kind : 3; }; + class ParenExprBitfields { + friend class ASTStmtReader; + friend class ASTStmtWriter; + friend class ParenExpr; + + LLVM_PREFERRED_TYPE(ExprBitfields) + unsigned : NumExprBits; + + LLVM_PREFERRED_TYPE(bool) + unsigned ProducedByFoldExpansion : 1; + }; + class StmtExprBitfields { friend class ASTStmtReader; friend class StmtExpr; @@ -1241,6 +1253,7 @@ class alignas(void *) Stmt { GenericSelectionExprBitfields GenericSelectionExprBits; PseudoObjectExprBitfields PseudoObjectExprBits; SourceLocExprBitfields SourceLocExprBits; + ParenExprBitfields ParenExprBits; // GNU Extensions. StmtExprBitfields StmtExprBits; diff --git a/clang/include/clang/CIR/.clang-tidy b/clang/include/clang/CIR/.clang-tidy new file mode 100644 index 000000000000000..aaba4585494d66c --- /dev/null +++ b/clang/include/clang/CIR/.clang-tidy @@ -0,0 +1,62 @@ +InheritParentConfig: true +Checks: > + -misc-const-correctness, + -llvm-header-guard, + bugprone-argument-comment, + bugprone-assert-side-effect, + bugprone-branch-clone, + bugprone-copy-constructor-init, + bugprone-dangling-handle, + bugprone-dynamic-static-initializers, + bugprone-macro-parentheses, + bugprone-macro-repeated-side-effects, + bugprone-misplaced-widening-cast, + bugprone-move-forwarding-reference, + bugprone-multiple-statement-macro, + bugprone-suspicious-semicolon, + bugprone-swapped-arguments, + bugprone-terminating-continue, + bugprone-unused-raii, + bugprone-unused-return-value, + misc-redundant-expression, + misc-static-assert, + misc-unused-using-decls, + modernize-use-bool-literals, + modernize-loop-convert, + modernize-make-unique, + modernize-raw-string-literal, + modernize-use-equals-default, + modernize-use-default-member-init, + modernize-use-emplace, + modernize-use-nullptr, + modernize-use-override, + modernize-use-using, + performance-for-range-copy, + performance-implicit-conversion-in-loop, + performance-inefficient-algorithm, + performance-inefficient-vector-operation, + performance-move-const-arg, + performance-no-automatic-move, + performance-trivially-destructible, + performance-unnecessary-copy-initialization, + performance-unnecessary-value-param, + readability-avoid-const-params-in-decls, + readability-const-return-type, + readability-container-size-empty, + readability-identifier-naming, + readability-inconsistent-declaration-parameter-name, + readability-misleading-indentation, + readability-redundant-control-flow, + readability-redundant-smartptr-get, + readability-simplify-boolean-expr, + readability-simplify-subscript-expr, + readability-use-anyofallof + + +CheckOptions: + - key: readability-identifier-naming.MemberCase + value: camelBack + - key: readability-identifier-naming.ParameterCase + value: camelBack + - key: readability-identifier-naming.VariableCase + value: camelBack diff --git a/clang/include/clang/CIRFrontendAction/.clang-tidy b/clang/include/clang/CIRFrontendAction/.clang-tidy new file mode 100644 index 000000000000000..ef88dbcec488c8c --- /dev/null +++ b/clang/include/clang/CIRFrontendAction/.clang-tidy @@ -0,0 +1,53 @@ +InheritParentConfig: true +Checks: > + -misc-const-correctness, + -llvm-header-guard, + bugprone-argument-comment, + bugprone-assert-side-effect, + bugprone-branch-clone, + bugprone-copy-constructor-init, + bugprone-dangling-handle, + bugprone-dynamic-static-initializers, + bugprone-macro-parentheses, + bugprone-macro-repeated-side-effects, + bugprone-misplaced-widening-cast, + bugprone-move-forwarding-reference, + bugprone-multiple-statement-macro, + bugprone-suspicious-semicolon, + bugprone-swapped-arguments, + bugprone-terminating-continue, + bugprone-unused-raii, + bugprone-unused-return-value, + misc-redundant-expression, + misc-static-assert, + misc-unused-using-decls, + modernize-use-bool-literals, + modernize-loop-convert, + modernize-make-unique, + modernize-raw-string-literal, + modernize-use-equals-default, + modernize-use-default-member-init, + modernize-use-emplace, + modernize-use-nullptr, + modernize-use-override, + modernize-use-using, + performance-for-range-copy, + performance-implicit-conversion-in-loop, + performance-inefficient-algorithm, + performance-inefficient-vector-operation, + performance-move-const-arg, + performance-no-automatic-move, + performance-trivially-destructible, + performance-unnecessary-copy-initialization, + performance-unnecessary-value-param, + readability-avoid-const-params-in-decls, + readability-const-return-type, + readability-container-size-empty, + readability-identifier-naming, + readability-inconsistent-declaration-parameter-name, + readability-misleading-indentation, + readability-redundant-control-flow, + readability-redundant-smartptr-get, + readability-simplify-boolean-expr, + readability-simplify-subscript-expr, + readability-use-anyofallof diff --git a/clang/include/clang/Format/Format.h b/clang/include/clang/Format/Format.h index 82cd863e615a871..3d3e4330902a300 100644 --- a/clang/include/clang/Format/Format.h +++ b/clang/include/clang/Format/Format.h @@ -131,7 +131,7 @@ struct FormatStyle { /// Don't align array initializer columns. AIAS_None }; - /// if not ``None``, when using initialization for an array of structs + /// If not ``None``, when using initialization for an array of structs /// aligns the fields into columns. /// /// \note @@ -145,11 +145,12 @@ struct FormatStyle { /// Alignment options. /// /// They can also be read as a whole for compatibility. The choices are: - /// - None - /// - Consecutive - /// - AcrossEmptyLines - /// - AcrossComments - /// - AcrossEmptyLinesAndComments + /// + /// * ``None`` + /// * ``Consecutive`` + /// * ``AcrossEmptyLines`` + /// * ``AcrossComments`` + /// * ``AcrossEmptyLinesAndComments`` /// /// For example, to align across empty lines and not across comments, either /// of these work. diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp index a81429ad6a23804..034fbbe0bc7829f 100644 --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -14325,9 +14325,17 @@ void ASTContext::getFunctionFeatureMap(llvm::StringMap &FeatureMap, Target->initFeatureMap(FeatureMap, getDiagnostics(), TargetCPU, Features); } } else if (const auto *TV = FD->getAttr()) { - llvm::SmallVector Feats; - TV->getFeatures(Feats); - std::vector Features = getFMVBackendFeaturesFor(Feats); + std::vector Features; + if (Target->getTriple().isRISCV()) { + ParsedTargetAttr ParsedAttr = Target->parseTargetAttr(TV->getName()); + Features.insert(Features.begin(), ParsedAttr.Features.begin(), + ParsedAttr.Features.end()); + } else { + assert(Target->getTriple().isAArch64()); + llvm::SmallVector Feats; + TV->getFeatures(Feats); + Features = getFMVBackendFeaturesFor(Feats); + } Features.insert(Features.begin(), Target->getTargetOpts().FeaturesAsWritten.begin(), Target->getTargetOpts().FeaturesAsWritten.end()); diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 72c94e6fad3e0da..98381254886e294 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -1184,6 +1184,10 @@ static bool interp__builtin_ia32_bzhi(InterpState &S, CodePtr OpPC, const InterpFrame *Frame, const Function *Func, const CallExpr *Call) { + QualType CallType = Call->getType(); + if (!CallType->isIntegerType()) + return false; + PrimType ValT = *S.Ctx.classify(Call->getArg(0)); PrimType IndexT = *S.Ctx.classify(Call->getArg(1)); @@ -1197,7 +1201,7 @@ static bool interp__builtin_ia32_bzhi(InterpState &S, CodePtr OpPC, if (Index < BitWidth) Val.clearHighBits(BitWidth - Index); - pushInteger(S, Val, Call->getType()); + pushInteger(S, Val, CallType); return true; } @@ -1210,7 +1214,7 @@ static bool interp__builtin_ia32_lzcnt(InterpState &S, CodePtr OpPC, return false; APSInt Val = peekToAPSInt(S.Stk, *S.Ctx.classify(Call->getArg(0))); - pushInteger(S, Val.countLeadingZeros(), Call->getType()); + pushInteger(S, Val.countLeadingZeros(), CallType); return true; } @@ -1223,7 +1227,7 @@ static bool interp__builtin_ia32_tzcnt(InterpState &S, CodePtr OpPC, return false; APSInt Val = peekToAPSInt(S.Stk, *S.Ctx.classify(Call->getArg(0))); - pushInteger(S, Val.countTrailingZeros(), Call->getType()); + pushInteger(S, Val.countTrailingZeros(), CallType); return true; } diff --git a/clang/lib/AST/DeclCXX.cpp b/clang/lib/AST/DeclCXX.cpp index f5a0aa8f82512e8..1364ccc745ba016 100644 --- a/clang/lib/AST/DeclCXX.cpp +++ b/clang/lib/AST/DeclCXX.cpp @@ -1631,13 +1631,42 @@ static bool allLookupResultsAreTheSame(const DeclContext::lookup_result &R) { static NamedDecl* getLambdaCallOperatorHelper(const CXXRecordDecl &RD) { if (!RD.isLambda()) return nullptr; DeclarationName Name = - RD.getASTContext().DeclarationNames.getCXXOperatorName(OO_Call); - DeclContext::lookup_result Calls = RD.lookup(Name); + RD.getASTContext().DeclarationNames.getCXXOperatorName(OO_Call); + DeclContext::lookup_result Calls = RD.lookup(Name); assert(!Calls.empty() && "Missing lambda call operator!"); assert(allLookupResultsAreTheSame(Calls) && "More than one lambda call operator!"); - return Calls.front(); + + // FIXME: If we have multiple call operators, we might be in a situation + // where we merged this lambda with one from another module; in that + // case, return our method (instead of that of the other lambda). + // + // This avoids situations where, given two modules A and B, if we + // try to instantiate A's call operator in a function in B, anything + // in the call operator that relies on local decls in the surrounding + // function will crash because it tries to find A's decls, but we only + // instantiated B's: + // + // template + // void f() { + // using T = int; // We only instantiate B's version of this. + // auto L = [](T) { }; // But A's call operator would want A's here. + // } + // + // Walk the call operator’s redecl chain to find the one that belongs + // to this module. + // + // TODO: We need to fix this properly (see + // https://github.com/llvm/llvm-project/issues/90154). + Module *M = RD.getOwningModule(); + for (Decl *D : Calls.front()->redecls()) { + auto *MD = cast(D); + if (MD->getOwningModule() == M) + return MD; + } + + llvm_unreachable("Couldn't find our call operator!"); } FunctionTemplateDecl* CXXRecordDecl::getDependentLambdaCallOperator() const { diff --git a/clang/lib/AST/ItaniumMangle.cpp b/clang/lib/AST/ItaniumMangle.cpp index 769a863c2b6764a..777cdca1a0c0d76 100644 --- a/clang/lib/AST/ItaniumMangle.cpp +++ b/clang/lib/AST/ItaniumMangle.cpp @@ -1164,8 +1164,25 @@ void CXXNameMangler::mangleUnscopedName(GlobalDecl GD, const DeclContext *DC, // ::= St # ::std:: assert(!isa(DC) && "unskipped LinkageSpecDecl"); - if (isStdNamespace(DC)) + if (isStdNamespace(DC)) { + if (getASTContext().getTargetInfo().getTriple().isOSSolaris()) { + const NamedDecl *ND = cast(GD.getDecl()); + if (const RecordDecl *RD = dyn_cast(ND)) { + // Issue #33114: Need non-standard mangling of std::tm etc. for + // Solaris ABI compatibility. + // + // ::= tm # ::std::tm, same for the others + if (const IdentifierInfo *II = RD->getIdentifier()) { + StringRef type = II->getName(); + if (llvm::is_contained({"div_t", "ldiv_t", "lconv", "tm"}, type)) { + Out << type.size() << type; + return; + } + } + } + } Out << "St"; + } mangleUnqualifiedName(GD, DC, AdditionalAbiTags); } diff --git a/clang/lib/CIR/.clang-tidy b/clang/lib/CIR/.clang-tidy new file mode 100644 index 000000000000000..aaba4585494d66c --- /dev/null +++ b/clang/lib/CIR/.clang-tidy @@ -0,0 +1,62 @@ +InheritParentConfig: true +Checks: > + -misc-const-correctness, + -llvm-header-guard, + bugprone-argument-comment, + bugprone-assert-side-effect, + bugprone-branch-clone, + bugprone-copy-constructor-init, + bugprone-dangling-handle, + bugprone-dynamic-static-initializers, + bugprone-macro-parentheses, + bugprone-macro-repeated-side-effects, + bugprone-misplaced-widening-cast, + bugprone-move-forwarding-reference, + bugprone-multiple-statement-macro, + bugprone-suspicious-semicolon, + bugprone-swapped-arguments, + bugprone-terminating-continue, + bugprone-unused-raii, + bugprone-unused-return-value, + misc-redundant-expression, + misc-static-assert, + misc-unused-using-decls, + modernize-use-bool-literals, + modernize-loop-convert, + modernize-make-unique, + modernize-raw-string-literal, + modernize-use-equals-default, + modernize-use-default-member-init, + modernize-use-emplace, + modernize-use-nullptr, + modernize-use-override, + modernize-use-using, + performance-for-range-copy, + performance-implicit-conversion-in-loop, + performance-inefficient-algorithm, + performance-inefficient-vector-operation, + performance-move-const-arg, + performance-no-automatic-move, + performance-trivially-destructible, + performance-unnecessary-copy-initialization, + performance-unnecessary-value-param, + readability-avoid-const-params-in-decls, + readability-const-return-type, + readability-container-size-empty, + readability-identifier-naming, + readability-inconsistent-declaration-parameter-name, + readability-misleading-indentation, + readability-redundant-control-flow, + readability-redundant-smartptr-get, + readability-simplify-boolean-expr, + readability-simplify-subscript-expr, + readability-use-anyofallof + + +CheckOptions: + - key: readability-identifier-naming.MemberCase + value: camelBack + - key: readability-identifier-naming.ParameterCase + value: camelBack + - key: readability-identifier-naming.VariableCase + value: camelBack diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index 52d2f6d52abf949..451442765620f7d 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -1096,7 +1096,7 @@ class StructAccessBase } const Expr *VisitCastExpr(const CastExpr *E) { if (E->getCastKind() == CK_LValueToRValue) - return E; + return IsExpectedRecordDecl(E) ? E : nullptr; return Visit(E->getSubExpr()); } const Expr *VisitParenExpr(const ParenExpr *E) { diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 25c1c496a4f27f5..5ba098144a74e79 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -4287,8 +4287,13 @@ void CodeGenModule::emitMultiVersionFunctions() { } else if (const auto *TVA = CurFD->getAttr()) { if (TVA->isDefaultVersion() && IsDefined) ShouldEmitResolver = true; - TVA->getFeatures(Feats); llvm::Function *Func = createFunction(CurFD); + if (getTarget().getTriple().isRISCV()) { + Feats.push_back(TVA->getName()); + } else { + assert(getTarget().getTriple().isAArch64()); + TVA->getFeatures(Feats); + } Options.emplace_back(Func, /*Architecture*/ "", Feats); } else if (const auto *TC = CurFD->getAttr()) { if (IsDefined) diff --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp index 01b4b6f0b0d2c1c..f97127f09d2096b 100644 --- a/clang/lib/Format/Format.cpp +++ b/clang/lib/Format/Format.cpp @@ -50,29 +50,25 @@ template <> struct MappingTraits { {/*Enabled=*/true, /*AcrossEmptyLines=*/false, /*AcrossComments=*/false, /*AlignCompound=*/false, /*AlignFunctionDeclarations=*/true, - /*AlignFunctionPointers=*/false, - /*PadOperators=*/true})); + /*AlignFunctionPointers=*/false, /*PadOperators=*/true})); IO.enumCase(Value, "AcrossEmptyLines", FormatStyle::AlignConsecutiveStyle( {/*Enabled=*/true, /*AcrossEmptyLines=*/true, /*AcrossComments=*/false, /*AlignCompound=*/false, /*AlignFunctionDeclarations=*/true, - /*AlignFunctionPointers=*/false, - /*PadOperators=*/true})); + /*AlignFunctionPointers=*/false, /*PadOperators=*/true})); IO.enumCase(Value, "AcrossComments", FormatStyle::AlignConsecutiveStyle( {/*Enabled=*/true, /*AcrossEmptyLines=*/false, /*AcrossComments=*/true, /*AlignCompound=*/false, /*AlignFunctionDeclarations=*/true, - /*AlignFunctionPointers=*/false, - /*PadOperators=*/true})); + /*AlignFunctionPointers=*/false, /*PadOperators=*/true})); IO.enumCase(Value, "AcrossEmptyLinesAndComments", FormatStyle::AlignConsecutiveStyle( {/*Enabled=*/true, /*AcrossEmptyLines=*/true, /*AcrossComments=*/true, /*AlignCompound=*/false, /*AlignFunctionDeclarations=*/true, - /*AlignFunctionPointers=*/false, - /*PadOperators=*/true})); + /*AlignFunctionPointers=*/false, /*PadOperators=*/true})); // For backward compatibility. IO.enumCase(Value, "true", @@ -80,8 +76,7 @@ template <> struct MappingTraits { {/*Enabled=*/true, /*AcrossEmptyLines=*/false, /*AcrossComments=*/false, /*AlignCompound=*/false, /*AlignFunctionDeclarations=*/true, - /*AlignFunctionPointers=*/false, - /*PadOperators=*/true})); + /*AlignFunctionPointers=*/false, /*PadOperators=*/true})); IO.enumCase(Value, "false", FormatStyle::AlignConsecutiveStyle({})); } @@ -1445,11 +1440,6 @@ FormatStyle getLLVMStyle(FormatStyle::LanguageKind Language) { LLVMStyle.AlignAfterOpenBracket = FormatStyle::BAS_Align; LLVMStyle.AlignArrayOfStructures = FormatStyle::AIAS_None; LLVMStyle.AlignConsecutiveAssignments = {}; - LLVMStyle.AlignConsecutiveAssignments.AcrossComments = false; - LLVMStyle.AlignConsecutiveAssignments.AcrossEmptyLines = false; - LLVMStyle.AlignConsecutiveAssignments.AlignCompound = false; - LLVMStyle.AlignConsecutiveAssignments.AlignFunctionPointers = false; - LLVMStyle.AlignConsecutiveAssignments.Enabled = false; LLVMStyle.AlignConsecutiveAssignments.PadOperators = true; LLVMStyle.AlignConsecutiveBitFields = {}; LLVMStyle.AlignConsecutiveDeclarations = {}; diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp index d537855fef45644..f6e5798057bbd28 100644 --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -4910,6 +4910,8 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line, if (Left.is(tok::star) && Right.is(tok::comment)) return true; + const auto *BeforeLeft = Left.Previous; + if (IsCpp) { if (Left.is(TT_OverloadedOperator) && Right.isOneOf(TT_TemplateOpener, TT_TemplateCloser)) { @@ -4962,7 +4964,7 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line, if (Left.Tok.getIdentifierInfo() && Right.Tok.isLiteral()) return true; } else if (Style.isProto()) { - if (Right.is(tok::period) && + if (Right.is(tok::period) && !(BeforeLeft && BeforeLeft->is(tok::period)) && Left.isOneOf(Keywords.kw_optional, Keywords.kw_required, Keywords.kw_repeated, Keywords.kw_extend)) { return true; @@ -5070,8 +5072,8 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line, if (Left.is(TT_FatArrow)) return true; // for await ( ... - if (Right.is(tok::l_paren) && Left.is(Keywords.kw_await) && Left.Previous && - Left.Previous->is(tok::kw_for)) { + if (Right.is(tok::l_paren) && Left.is(Keywords.kw_await) && BeforeLeft && + BeforeLeft->is(tok::kw_for)) { return true; } if (Left.is(Keywords.kw_async) && Right.is(tok::l_paren) && @@ -5108,7 +5110,7 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line, return false; // Valid JS method names can include keywords, e.g. `foo.delete()` or // `bar.instanceof()`. Recognize call positions by preceding period. - if (Left.Previous && Left.Previous->is(tok::period) && + if (BeforeLeft && BeforeLeft->is(tok::period) && Left.Tok.getIdentifierInfo()) { return false; } @@ -5126,22 +5128,22 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line, // "of" is only a keyword if it appears after another identifier // (e.g. as "const x of y" in a for loop), or after a destructuring // operation (const [x, y] of z, const {a, b} of c). - (Left.is(Keywords.kw_of) && Left.Previous && - (Left.Previous->is(tok::identifier) || - Left.Previous->isOneOf(tok::r_square, tok::r_brace)))) && - (!Left.Previous || Left.Previous->isNot(tok::period))) { + (Left.is(Keywords.kw_of) && BeforeLeft && + (BeforeLeft->is(tok::identifier) || + BeforeLeft->isOneOf(tok::r_square, tok::r_brace)))) && + (!BeforeLeft || BeforeLeft->isNot(tok::period))) { return true; } - if (Left.isOneOf(tok::kw_for, Keywords.kw_as) && Left.Previous && - Left.Previous->is(tok::period) && Right.is(tok::l_paren)) { + if (Left.isOneOf(tok::kw_for, Keywords.kw_as) && BeforeLeft && + BeforeLeft->is(tok::period) && Right.is(tok::l_paren)) { return false; } if (Left.is(Keywords.kw_as) && Right.isOneOf(tok::l_square, tok::l_brace, tok::l_paren)) { return true; } - if (Left.is(tok::kw_default) && Left.Previous && - Left.Previous->is(tok::kw_export)) { + if (Left.is(tok::kw_default) && BeforeLeft && + BeforeLeft->is(tok::kw_export)) { return true; } if (Left.is(Keywords.kw_is) && Right.is(tok::l_brace)) diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h index 4f172c74b31cbb2..45e7eeb5327d0cb 100644 --- a/clang/lib/Headers/avx512fintrin.h +++ b/clang/lib/Headers/avx512fintrin.h @@ -175,12 +175,21 @@ typedef enum __attribute__((__always_inline__, __nodebug__, \ __target__("avx512f,no-evex512"))) +#if defined(__cplusplus) && (__cplusplus >= 201103L) +#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr +#define __DEFAULT_FN_ATTRS512_CONSTEXPR __DEFAULT_FN_ATTRS512 constexpr +#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr +#else +#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS128 +#define __DEFAULT_FN_ATTRS512_CONSTEXPR __DEFAULT_FN_ATTRS512 +#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS +#endif + /* Create vectors with repeated elements */ -static __inline __m512i __DEFAULT_FN_ATTRS512 -_mm512_setzero_si512(void) -{ - return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 }; +static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_setzero_si512(void) { + return __extension__(__m512i)(__v8di){0, 0, 0, 0, 0, 0, 0, 0}; } #define _mm512_setzero_epi32 _mm512_setzero_si512 @@ -256,20 +265,16 @@ _mm512_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A) (__v8di) _mm512_setzero_si512()); } - -static __inline __m512 __DEFAULT_FN_ATTRS512 -_mm512_setzero_ps(void) -{ - return __extension__ (__m512){ 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, - 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f }; +static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setzero_ps(void) { + return __extension__(__m512){0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f}; } #define _mm512_setzero _mm512_setzero_ps -static __inline __m512d __DEFAULT_FN_ATTRS512 -_mm512_setzero_pd(void) -{ - return __extension__ (__m512d){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 }; +static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_setzero_pd(void) { + return __extension__(__m512d){0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0}; } static __inline __m512 __DEFAULT_FN_ATTRS512 @@ -9775,5 +9780,8 @@ _mm512_cvtsi512_si32(__m512i __A) { #undef __DEFAULT_FN_ATTRS512 #undef __DEFAULT_FN_ATTRS128 #undef __DEFAULT_FN_ATTRS +#undef __DEFAULT_FN_ATTRS512_CONSTEXPR +#undef __DEFAULT_FN_ATTRS128_CONSTEXPR +#undef __DEFAULT_FN_ATTRS_CONSTEXPR #endif /* __AVX512FINTRIN_H */ diff --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h index 73707c623065e73..bb43b292be01f6a 100644 --- a/clang/lib/Headers/avxintrin.h +++ b/clang/lib/Headers/avxintrin.h @@ -66,6 +66,14 @@ typedef __bf16 __m256bh __attribute__((__vector_size__(32), __aligned__(32))); __min_vector_width__(128))) #endif +#if defined(__cplusplus) && (__cplusplus >= 201103L) +#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr +#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr +#else +#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS128 +#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS +#endif + /* Arithmetic */ /// Adds two 256-bit vectors of [4 x double]. /// @@ -4331,10 +4339,8 @@ _mm256_set1_epi64x(long long __q) /// This intrinsic corresponds to the VXORPS instruction. /// /// \returns A 256-bit vector of [4 x double] with all elements set to zero. -static __inline __m256d __DEFAULT_FN_ATTRS -_mm256_setzero_pd(void) -{ - return __extension__ (__m256d){ 0.0, 0.0, 0.0, 0.0 }; +static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_setzero_pd(void) { + return __extension__(__m256d){0.0, 0.0, 0.0, 0.0}; } /// Constructs a 256-bit floating-point vector of [8 x float] with all @@ -4345,9 +4351,7 @@ _mm256_setzero_pd(void) /// This intrinsic corresponds to the VXORPS instruction. /// /// \returns A 256-bit vector of [8 x float] with all elements set to zero. -static __inline __m256 __DEFAULT_FN_ATTRS -_mm256_setzero_ps(void) -{ +static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_setzero_ps(void) { return __extension__ (__m256){ 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f }; } @@ -4358,9 +4362,8 @@ _mm256_setzero_ps(void) /// This intrinsic corresponds to the VXORPS instruction. /// /// \returns A 256-bit integer vector initialized to zero. -static __inline __m256i __DEFAULT_FN_ATTRS -_mm256_setzero_si256(void) -{ +static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm256_setzero_si256(void) { return __extension__ (__m256i)(__v4di){ 0, 0, 0, 0 }; } @@ -5130,6 +5133,8 @@ _mm256_storeu2_m128i(__m128i_u *__addr_hi, __m128i_u *__addr_lo, __m256i __a) } #undef __DEFAULT_FN_ATTRS +#undef __DEFAULT_FN_ATTRS_CONSTEXPR #undef __DEFAULT_FN_ATTRS128 +#undef __DEFAULT_FN_ATTRS128_CONSTEXPR #endif /* __AVXINTRIN_H */ diff --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h index 72a32f953e01187..d2121408c114b5f 100644 --- a/clang/lib/Headers/emmintrin.h +++ b/clang/lib/Headers/emmintrin.h @@ -59,6 +59,12 @@ typedef __bf16 __m128bh __attribute__((__vector_size__(16), __aligned__(16))); __min_vector_width__(128))) #endif +#if defined(__cplusplus) && (__cplusplus >= 201103L) +#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr +#else +#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS +#endif + #define __trunc64(x) \ (__m64) __builtin_shufflevector((__v2di)(x), __extension__(__v2di){}, 0) #define __anyext128(x) \ @@ -1863,7 +1869,7 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_setr_pd(double __w, /// /// \returns An initialized 128-bit floating-point vector of [2 x double] with /// all elements set to zero. -static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_setzero_pd(void) { +static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_setzero_pd(void) { return __extension__(__m128d){0.0, 0.0}; } @@ -3862,7 +3868,7 @@ _mm_setr_epi8(char __b0, char __b1, char __b2, char __b3, char __b4, char __b5, /// /// \returns An initialized 128-bit integer vector with all elements set to /// zero. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_setzero_si128(void) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_setzero_si128(void) { return __extension__(__m128i)(__v2di){0LL, 0LL}; } @@ -4900,6 +4906,7 @@ void _mm_pause(void); #undef __anyext128 #undef __trunc64 #undef __DEFAULT_FN_ATTRS +#undef __DEFAULT_FN_ATTRS_CONSTEXPR #define _MM_SHUFFLE2(x, y) (((x) << 1) | (y)) diff --git a/clang/lib/Headers/pmmintrin.h b/clang/lib/Headers/pmmintrin.h index 9ad76579668b35c..cd605df7fb52d88 100644 --- a/clang/lib/Headers/pmmintrin.h +++ b/clang/lib/Headers/pmmintrin.h @@ -27,6 +27,12 @@ __min_vector_width__(128))) #endif +#if defined(__cplusplus) && (__cplusplus >= 201103L) +#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr +#else +#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS +#endif + /// Loads data from an unaligned memory location to elements in a 128-bit /// vector. /// @@ -128,7 +134,7 @@ _mm_hsub_ps(__m128 __a, __m128 __b) /// destination. /// \returns A 128-bit vector of [4 x float] containing the moved and duplicated /// values. -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_movehdup_ps(__m128 __a) { return __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 1, 1, 3, 3); @@ -149,7 +155,7 @@ _mm_movehdup_ps(__m128 __a) /// destination. /// \returns A 128-bit vector of [4 x float] containing the moved and duplicated /// values. -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_moveldup_ps(__m128 __a) { return __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 0, 0, 2, 2); @@ -250,7 +256,7 @@ _mm_hsub_pd(__m128d __a, __m128d __b) /// [127:64] and [63:0] of the destination. /// \returns A 128-bit vector of [2 x double] containing the moved and /// duplicated values. -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_movedup_pd(__m128d __a) { return __builtin_shufflevector((__v2df)__a, (__v2df)__a, 0, 0); @@ -303,5 +309,6 @@ _mm_mwait(unsigned __extensions, unsigned __hints) } #undef __DEFAULT_FN_ATTRS +#undef __DEFAULT_FN_ATTRS_CONSTEXPR #endif /* __PMMINTRIN_H */ diff --git a/clang/lib/Lex/PPMacroExpansion.cpp b/clang/lib/Lex/PPMacroExpansion.cpp index 2b62f573857ee83..27d09b4c8ee744d 100644 --- a/clang/lib/Lex/PPMacroExpansion.cpp +++ b/clang/lib/Lex/PPMacroExpansion.cpp @@ -1604,10 +1604,12 @@ static bool isTargetVariantEnvironment(const TargetInfo &TI, return false; } -#if defined(__sun__) && defined(__svr4__) +#if defined(__sun__) && defined(__svr4__) && defined(__clang__) && \ + __clang__ < 20 // GCC mangles std::tm as tm for binary compatibility on Solaris (Issue // #33114). We need to match this to allow the std::put_time calls to link -// (PR #99075). +// (PR #99075). clang 20 contains a fix, but the workaround is still needed +// with older versions. asm("_ZNKSt8time_putIcSt19ostreambuf_iteratorIcSt11char_traitsIcEEE3putES3_" "RSt8ios_basecPKSt2tmPKcSB_ = " "_ZNKSt8time_putIcSt19ostreambuf_iteratorIcSt11char_traitsIcEEE3putES3_" diff --git a/clang/lib/Sema/SemaARM.cpp b/clang/lib/Sema/SemaARM.cpp index efde354860de43e..c3a6e5ef8a9d44a 100644 --- a/clang/lib/Sema/SemaARM.cpp +++ b/clang/lib/Sema/SemaARM.cpp @@ -567,15 +567,9 @@ static bool checkArmStreamingBuiltin(Sema &S, CallExpr *TheCall, // * When compiling for SVE only, the caller must be in non-streaming mode. // * When compiling for both SVE and SME, the caller can be in either mode. if (BuiltinType == SemaARM::VerifyRuntimeMode) { - auto DisableFeatures = [](llvm::StringMap &Map, StringRef S) { - for (StringRef K : Map.keys()) - if (K.starts_with(S)) - Map[K] = false; - }; - llvm::StringMap CallerFeatureMapWithoutSVE; S.Context.getFunctionFeatureMap(CallerFeatureMapWithoutSVE, FD); - DisableFeatures(CallerFeatureMapWithoutSVE, "sve"); + CallerFeatureMapWithoutSVE["sve"] = false; // Avoid emitting diagnostics for a function that can never compile. if (FnType == SemaARM::ArmStreaming && !CallerFeatureMapWithoutSVE["sme"]) @@ -583,7 +577,7 @@ static bool checkArmStreamingBuiltin(Sema &S, CallExpr *TheCall, llvm::StringMap CallerFeatureMapWithoutSME; S.Context.getFunctionFeatureMap(CallerFeatureMapWithoutSME, FD); - DisableFeatures(CallerFeatureMapWithoutSME, "sme"); + CallerFeatureMapWithoutSME["sme"] = false; // We know the builtin requires either some combination of SVE flags, or // some combination of SME flags, but we need to figure out which part diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index 2bf610746bc317f..4eaa3913f4344ae 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -7957,7 +7957,8 @@ NamedDecl *Sema::ActOnVariableDeclarator( } if (!R->isIntegralType(Context) && !R->isPointerType()) { - Diag(D.getBeginLoc(), diag::err_asm_bad_register_type); + Diag(TInfo->getTypeLoc().getBeginLoc(), diag::err_asm_bad_register_type) + << TInfo->getTypeLoc().getSourceRange(); NewVD->setInvalidDecl(true); } } @@ -10329,7 +10330,8 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC, // Handle attributes. ProcessDeclAttributes(S, NewFD, D); const auto *NewTVA = NewFD->getAttr(); - if (NewTVA && !NewTVA->isDefaultVersion() && + if (Context.getTargetInfo().getTriple().isAArch64() && NewTVA && + !NewTVA->isDefaultVersion() && !Context.getTargetInfo().hasFeature("fmv")) { // Don't add to scope fmv functions declarations if fmv disabled AddToScope = false; @@ -11038,7 +11040,16 @@ static bool CheckMultiVersionValue(Sema &S, const FunctionDecl *FD) { if (TVA) { llvm::SmallVector Feats; - TVA->getFeatures(Feats); + ParsedTargetAttr ParseInfo; + if (S.getASTContext().getTargetInfo().getTriple().isRISCV()) { + ParseInfo = + S.getASTContext().getTargetInfo().parseTargetAttr(TVA->getName()); + for (auto &Feat : ParseInfo.Features) + Feats.push_back(StringRef{Feat}.substr(1)); + } else { + assert(S.getASTContext().getTargetInfo().getTriple().isAArch64()); + TVA->getFeatures(Feats); + } for (const auto &Feat : Feats) { if (!TargetInfo.validateCpuSupports(Feat)) { S.Diag(FD->getLocation(), diag::err_bad_multiversion_option) @@ -11324,7 +11335,8 @@ static bool PreviousDeclsHaveMultiVersionAttribute(const FunctionDecl *FD) { } static void patchDefaultTargetVersion(FunctionDecl *From, FunctionDecl *To) { - if (!From->getASTContext().getTargetInfo().getTriple().isAArch64()) + if (!From->getASTContext().getTargetInfo().getTriple().isAArch64() && + !From->getASTContext().getTargetInfo().getTriple().isRISCV()) return; MultiVersionKind MVKindFrom = From->getMultiVersionKind(); @@ -15511,7 +15523,8 @@ Decl *Sema::ActOnStartOfFunctionDef(Scope *FnBodyScope, Decl *D, FD->setInvalidDecl(); } if (const auto *Attr = FD->getAttr()) { - if (!Context.getTargetInfo().hasFeature("fmv") && + if (Context.getTargetInfo().getTriple().isAArch64() && + !Context.getTargetInfo().hasFeature("fmv") && !Attr->isDefaultVersion()) { // If function multi versioning disabled skip parsing function body // defined with non-default target_version attribute diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp index c9b9f3a0007daa6..af983349a89b584 100644 --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -3040,6 +3040,54 @@ bool Sema::checkTargetVersionAttr(SourceLocation LiteralLoc, Decl *D, enum SecondParam { None }; enum ThirdParam { Target, TargetClones, TargetVersion }; llvm::SmallVector Features; + if (Context.getTargetInfo().getTriple().isRISCV()) { + llvm::SmallVector AttrStrs; + AttrStr.split(AttrStrs, ';'); + + bool HasArch = false; + bool HasPriority = false; + bool HasDefault = false; + bool DuplicateAttr = false; + for (auto &AttrStr : AttrStrs) { + // Only support arch=+ext,... syntax. + if (AttrStr.starts_with("arch=+")) { + if (HasArch) + DuplicateAttr = true; + HasArch = true; + ParsedTargetAttr TargetAttr = + Context.getTargetInfo().parseTargetAttr(AttrStr); + + if (TargetAttr.Features.empty() || + llvm::any_of(TargetAttr.Features, [&](const StringRef Ext) { + return !RISCV().isValidFMVExtension(Ext); + })) + return Diag(LiteralLoc, diag::warn_unsupported_target_attribute) + << Unsupported << None << AttrStr << TargetVersion; + } else if (AttrStr.starts_with("default")) { + if (HasDefault) + DuplicateAttr = true; + HasDefault = true; + } else if (AttrStr.consume_front("priority=")) { + if (HasPriority) + DuplicateAttr = true; + HasPriority = true; + int Digit; + if (AttrStr.getAsInteger(0, Digit)) + return Diag(LiteralLoc, diag::warn_unsupported_target_attribute) + << Unsupported << None << AttrStr << TargetVersion; + } else { + return Diag(LiteralLoc, diag::warn_unsupported_target_attribute) + << Unsupported << None << AttrStr << TargetVersion; + } + } + + if (((HasPriority || HasArch) && HasDefault) || DuplicateAttr || + (HasPriority && !HasArch)) + return Diag(LiteralLoc, diag::warn_unsupported_target_attribute) + << Unsupported << None << AttrStr << TargetVersion; + + return false; + } AttrStr.split(Features, "+"); for (auto &CurFeature : Features) { CurFeature = CurFeature.trim(); diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index 959f0739f03fb9d..f930a21ea870ec9 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -20223,6 +20223,8 @@ void Sema::DiagnoseEqualityWithExtraParens(ParenExpr *ParenE) { return; Expr *E = ParenE->IgnoreParens(); + if (ParenE->isProducedByFoldExpansion() && ParenE->getSubExpr() == E) + return; if (BinaryOperator *opE = dyn_cast(E)) if (opE->getOpcode() == BO_EQ && diff --git a/clang/lib/Sema/SemaFunctionEffects.cpp b/clang/lib/Sema/SemaFunctionEffects.cpp index 0fb18d207a50ba7..0ac5de29f66aa75 100644 --- a/clang/lib/Sema/SemaFunctionEffects.cpp +++ b/clang/lib/Sema/SemaFunctionEffects.cpp @@ -1048,15 +1048,14 @@ class Analyzer { } void checkIndirectCall(CallExpr *Call, QualType CalleeType) { - auto *FPT = - CalleeType->getAs(); // Null if FunctionType. FunctionEffectKindSet CalleeEffects; - if (FPT) - CalleeEffects.insert(FPT->getFunctionEffects()); + if (FunctionEffectsRef Effects = FunctionEffectsRef::get(CalleeType); + !Effects.empty()) + CalleeEffects.insert(Effects); auto Check1Effect = [&](FunctionEffect Effect, bool Inferring) { - if (FPT == nullptr || Effect.shouldDiagnoseFunctionCall( - /*direct=*/false, CalleeEffects)) + if (Effect.shouldDiagnoseFunctionCall( + /*direct=*/false, CalleeEffects)) addViolation(Inferring, Effect, ViolationID::CallsExprWithoutEffect, Call->getBeginLoc()); }; diff --git a/clang/lib/Sema/SemaTemplateInstantiate.cpp b/clang/lib/Sema/SemaTemplateInstantiate.cpp index 898255ff7c6a32a..4ce47d8c1ee7614 100644 --- a/clang/lib/Sema/SemaTemplateInstantiate.cpp +++ b/clang/lib/Sema/SemaTemplateInstantiate.cpp @@ -4206,18 +4206,14 @@ Sema::InstantiateClassMembers(SourceLocation PointOfInstantiation, if (Function->hasAttr()) continue; - MemberSpecializationInfo *MSInfo = - Function->getMemberSpecializationInfo(); - assert(MSInfo && "No member specialization information?"); - if (MSInfo->getTemplateSpecializationKind() - == TSK_ExplicitSpecialization) + TemplateSpecializationKind PrevTSK = + Function->getTemplateSpecializationKind(); + if (PrevTSK == TSK_ExplicitSpecialization) continue; - if (CheckSpecializationInstantiationRedecl(PointOfInstantiation, TSK, - Function, - MSInfo->getTemplateSpecializationKind(), - MSInfo->getPointOfInstantiation(), - SuppressNew) || + if (CheckSpecializationInstantiationRedecl( + PointOfInstantiation, TSK, Function, PrevTSK, + Function->getPointOfInstantiation(), SuppressNew) || SuppressNew) continue; diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h index ed9412c93c62b38..01c086a602dd5ad 100644 --- a/clang/lib/Sema/TreeTransform.h +++ b/clang/lib/Sema/TreeTransform.h @@ -15661,12 +15661,14 @@ TreeTransform::TransformCXXFoldExpr(CXXFoldExpr *E) { return true; } + if (ParenExpr *PE = dyn_cast_or_null(Result.get())) + PE->setIsProducedByFoldExpansion(); + // If we had no init and an empty pack, and we're not retaining an expansion, // then produce a fallback value or error. if (Result.isUnset()) return getDerived().RebuildEmptyCXXFoldExpr(E->getEllipsisLoc(), E->getOperator()); - return Result; } diff --git a/clang/lib/Serialization/ASTReaderStmt.cpp b/clang/lib/Serialization/ASTReaderStmt.cpp index 2038fe7829af9b4..6aaafb2e8d71ccc 100644 --- a/clang/lib/Serialization/ASTReaderStmt.cpp +++ b/clang/lib/Serialization/ASTReaderStmt.cpp @@ -706,6 +706,7 @@ void ASTStmtReader::VisitCharacterLiteral(CharacterLiteral *E) { void ASTStmtReader::VisitParenExpr(ParenExpr *E) { VisitExpr(E); + E->setIsProducedByFoldExpansion(Record.readInt()); E->setLParen(readSourceLocation()); E->setRParen(readSourceLocation()); E->setSubExpr(Record.readSubExpr()); diff --git a/clang/lib/Serialization/ASTWriterStmt.cpp b/clang/lib/Serialization/ASTWriterStmt.cpp index 64e4894dc29fb73..321e0031661ee26 100644 --- a/clang/lib/Serialization/ASTWriterStmt.cpp +++ b/clang/lib/Serialization/ASTWriterStmt.cpp @@ -786,6 +786,7 @@ void ASTStmtWriter::VisitCharacterLiteral(CharacterLiteral *E) { void ASTStmtWriter::VisitParenExpr(ParenExpr *E) { VisitExpr(E); + Record.push_back(E->isProducedByFoldExpansion()); Record.AddSourceLocation(E->getLParen()); Record.AddSourceLocation(E->getRParen()); Record.AddStmt(E->getSubExpr()); diff --git a/clang/test/AST/solaris-tm.cpp b/clang/test/AST/solaris-tm.cpp new file mode 100644 index 000000000000000..e559ece1429af84 --- /dev/null +++ b/clang/test/AST/solaris-tm.cpp @@ -0,0 +1,34 @@ +/// Check that std::tm and a few others are mangled as tm on Solaris only. +/// Issue #33114. +/// +// RUN: %clang_cc1 -emit-llvm %s -o - -triple amd64-pc-solaris2.11 | FileCheck --check-prefix=CHECK-SOLARIS %s +// RUN: %clang_cc1 -emit-llvm %s -o - -triple x86_64-unknown-linux-gnu | FileCheck --check-prefix=CHECK-LINUX %s +// +// REQUIRES: x86-registered-target + +namespace std { + extern "C" { + struct tm { + int tm_sec; + }; + struct ldiv_t { + long quot; + }; + } +} + +// CHECK-SOLARIS: @_Z6tmfunc2tm +// CHECK-SOLARIS: @_Z9tmccpfunc2tmPKcS1_ +// CHECK-SOLARIS: @_Z7tm2func2tmS_ +// CHECK-LINUX: @_Z6tmfuncSt2tm +// CHECK-LINUX: @_Z9tmccpfuncSt2tmPKcS1_ +// CHECK-LINUX: @_Z7tm2funcSt2tmS_ + +void tmfunc (std::tm tm) {} +void tmccpfunc (std::tm tm, const char *ccp, const char *ccp2) {} +void tm2func (std::tm tm, std::tm tm2) {} + +// CHECK-SOLARIS: @_Z7ldtfunc6ldiv_t +// CHECK-LINUX: @_Z7ldtfuncSt6ldiv_t + +void ldtfunc (std::ldiv_t ldt) {} diff --git a/clang/test/CodeGen/X86/avx-builtins-constrained-cmp.c b/clang/test/CodeGen/X86/avx-builtins-constrained-cmp.c index 570c9c942cca63d..c81282b0de8e7e5 100644 --- a/clang/test/CodeGen/X86/avx-builtins-constrained-cmp.c +++ b/clang/test/CodeGen/X86/avx-builtins-constrained-cmp.c @@ -1,5 +1,7 @@ -// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx -emit-llvm -ffp-exception-behavior=maytrap -o - -Wall -Werror | FileCheck %s -// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +avx -emit-llvm -ffp-exception-behavior=maytrap -o - -Wall -Werror | FileCheck %s +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx -emit-llvm -ffp-exception-behavior=maytrap -o - -Wall -Werror | FileCheck %s +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +avx -emit-llvm -ffp-exception-behavior=maytrap -o - -Wall -Werror | FileCheck %s +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx -emit-llvm -ffp-exception-behavior=maytrap -o - -Wall -Werror | FileCheck %s +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +avx -emit-llvm -ffp-exception-behavior=maytrap -o - -Wall -Werror | FileCheck %s // Test that the constrained intrinsics are picking up the exception // metadata from the AST instead of the global default from the command line. diff --git a/clang/test/CodeGen/X86/avx-builtins.c b/clang/test/CodeGen/X86/avx-builtins.c index 4bf1213d9fca97a..9d6c1897f540d38 100644 --- a/clang/test/CodeGen/X86/avx-builtins.c +++ b/clang/test/CodeGen/X86/avx-builtins.c @@ -1,13 +1,18 @@ -// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,X64 -// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,X64 -// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +avx -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,X86 -// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +avx -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,X86 -// RUN: %clang_cc1 -flax-vector-conversions=none -fms-extensions -fms-compatibility -ffreestanding %s -triple=x86_64-windows-msvc -target-feature +avx -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,X64 +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,X64 +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,X64 +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +avx -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,X86 +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +avx -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,X86 +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -fms-extensions -fms-compatibility -ffreestanding %s -triple=x86_64-windows-msvc -target-feature +avx -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,X64 +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,X64 +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,X64 +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +avx -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,X86 +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +avx -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,X86 +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -fms-extensions -fms-compatibility -ffreestanding %s -triple=x86_64-windows-msvc -target-feature +avx -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,X64 #include -// NOTE: This should match the tests in llvm/test/CodeGen/X86/sse-intrinsics-fast-isel.ll +// NOTE: This should match the tests in llvm/test/CodeGen/X86/avx-intrinsics-fast-isel.ll __m256d test_mm256_add_pd(__m256d A, __m256d B) { // CHECK-LABEL: test_mm256_add_pd @@ -23,13 +28,13 @@ __m256 test_mm256_add_ps(__m256 A, __m256 B) { __m256d test_mm256_addsub_pd(__m256d A, __m256d B) { // CHECK-LABEL: test_mm256_addsub_pd - // CHECK: call <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}) + // CHECK: call {{.*}}<4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}) return _mm256_addsub_pd(A, B); } __m256 test_mm256_addsub_ps(__m256 A, __m256 B) { // CHECK-LABEL: test_mm256_addsub_ps - // CHECK: call <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}) + // CHECK: call {{.*}}<8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}) return _mm256_addsub_ps(A, B); } @@ -73,13 +78,13 @@ __m256 test_mm256_blend_ps(__m256 A, __m256 B) { __m256d test_mm256_blendv_pd(__m256d V1, __m256d V2, __m256d V3) { // CHECK-LABEL: test_mm256_blendv_pd - // CHECK: call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}) + // CHECK: call {{.*}}<4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}) return _mm256_blendv_pd(V1, V2, V3); } __m256 test_mm256_blendv_ps(__m256 V1, __m256 V2, __m256 V3) { // CHECK-LABEL: test_mm256_blendv_ps - // CHECK: call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}) + // CHECK: call {{.*}}<8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}) return _mm256_blendv_ps(V1, V2, V3); } @@ -202,13 +207,13 @@ __m128i test_mm256_castsi256_si128(__m256i A) { __m256d test_mm256_ceil_pd(__m256d x) { // CHECK-LABEL: test_mm256_ceil_pd - // CHECK: call <4 x double> @llvm.x86.avx.round.pd.256(<4 x double> %{{.*}}, i32 2) + // CHECK: call {{.*}}<4 x double> @llvm.x86.avx.round.pd.256(<4 x double> %{{.*}}, i32 2) return _mm256_ceil_pd(x); } __m256 test_mm_ceil_ps(__m256 x) { // CHECK-LABEL: test_mm_ceil_ps - // CHECK: call <8 x float> @llvm.x86.avx.round.ps.256(<8 x float> %{{.*}}, i32 2) + // CHECK: call {{.*}}<8 x float> @llvm.x86.avx.round.ps.256(<8 x float> %{{.*}}, i32 2) return _mm256_ceil_ps(x); } @@ -886,13 +891,13 @@ __m128 test_mm_cmp_ps_true_us(__m128 a, __m128 b) { __m128d test_mm_cmp_sd(__m128d A, __m128d B) { // CHECK-LABEL: test_mm_cmp_sd - // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 13) + // CHECK: call {{.*}}<2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 13) return _mm_cmp_sd(A, B, _CMP_GE_OS); } __m128 test_mm_cmp_ss(__m128 A, __m128 B) { // CHECK-LABEL: test_mm_cmp_ss - // CHECK: call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 13) + // CHECK: call {{.*}}<4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 13) return _mm_cmp_ss(A, B, _CMP_GE_OS); } @@ -916,7 +921,7 @@ __m128i test_mm256_cvtpd_epi32(__m256d A) { __m128 test_mm256_cvtpd_ps(__m256d A) { // CHECK-LABEL: test_mm256_cvtpd_ps - // CHECK: call <4 x float> @llvm.x86.avx.cvt.pd2.ps.256(<4 x double> %{{.*}}) + // CHECK: call {{.*}}<4 x float> @llvm.x86.avx.cvt.pd2.ps.256(<4 x double> %{{.*}}) return _mm256_cvtpd_ps(A); } @@ -958,7 +963,7 @@ __m256 test_mm256_div_ps(__m256 A, __m256 B) { __m256 test_mm256_dp_ps(__m256 A, __m256 B) { // CHECK-LABEL: test_mm256_dp_ps - // CHECK: call <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float> {{.*}}, <8 x float> {{.*}}, i8 7) + // CHECK: call {{.*}}<8 x float> @llvm.x86.avx.dp.ps.256(<8 x float> {{.*}}, <8 x float> {{.*}}, i8 7) return _mm256_dp_ps(A, B, 7); } @@ -1010,37 +1015,37 @@ __m128i test_mm256_extractf128_si256(__m256i A) { __m256d test_mm256_floor_pd(__m256d x) { // CHECK-LABEL: test_mm256_floor_pd - // CHECK: call <4 x double> @llvm.x86.avx.round.pd.256(<4 x double> %{{.*}}, i32 1) + // CHECK: call {{.*}}<4 x double> @llvm.x86.avx.round.pd.256(<4 x double> %{{.*}}, i32 1) return _mm256_floor_pd(x); } __m256 test_mm_floor_ps(__m256 x) { // CHECK-LABEL: test_mm_floor_ps - // CHECK: call <8 x float> @llvm.x86.avx.round.ps.256(<8 x float> %{{.*}}, i32 1) + // CHECK: call {{.*}}<8 x float> @llvm.x86.avx.round.ps.256(<8 x float> %{{.*}}, i32 1) return _mm256_floor_ps(x); } __m256d test_mm256_hadd_pd(__m256d A, __m256d B) { // CHECK-LABEL: test_mm256_hadd_pd - // CHECK: call <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}) + // CHECK: call {{.*}}<4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}) return _mm256_hadd_pd(A, B); } __m256 test_mm256_hadd_ps(__m256 A, __m256 B) { // CHECK-LABEL: test_mm256_hadd_ps - // CHECK: call <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}) + // CHECK: call {{.*}}<8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}) return _mm256_hadd_ps(A, B); } __m256d test_mm256_hsub_pd(__m256d A, __m256d B) { // CHECK-LABEL: test_mm256_hsub_pd - // CHECK: call <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}) + // CHECK: call {{.*}}<4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}) return _mm256_hsub_pd(A, B); } __m256 test_mm256_hsub_ps(__m256 A, __m256 B) { // CHECK-LABEL: test_mm256_hsub_ps - // CHECK: call <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}) + // CHECK: call {{.*}}<8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}) return _mm256_hsub_ps(A, B); } @@ -1159,25 +1164,25 @@ __m256i test_mm256_loadu2_m128i(__m128i* A, __m128i* B) { __m128d test_mm_maskload_pd(double* A, __m128i B) { // CHECK-LABEL: test_mm_maskload_pd - // CHECK: call <2 x double> @llvm.x86.avx.maskload.pd(ptr %{{.*}}, <2 x i64> %{{.*}}) + // CHECK: call {{.*}}<2 x double> @llvm.x86.avx.maskload.pd(ptr %{{.*}}, <2 x i64> %{{.*}}) return _mm_maskload_pd(A, B); } __m256d test_mm256_maskload_pd(double* A, __m256i B) { // CHECK-LABEL: test_mm256_maskload_pd - // CHECK: call <4 x double> @llvm.x86.avx.maskload.pd.256(ptr %{{.*}}, <4 x i64> %{{.*}}) + // CHECK: call {{.*}}<4 x double> @llvm.x86.avx.maskload.pd.256(ptr %{{.*}}, <4 x i64> %{{.*}}) return _mm256_maskload_pd(A, B); } __m128 test_mm_maskload_ps(float* A, __m128i B) { // CHECK-LABEL: test_mm_maskload_ps - // CHECK: call <4 x float> @llvm.x86.avx.maskload.ps(ptr %{{.*}}, <4 x i32> %{{.*}}) + // CHECK: call {{.*}}<4 x float> @llvm.x86.avx.maskload.ps(ptr %{{.*}}, <4 x i32> %{{.*}}) return _mm_maskload_ps(A, B); } __m256 test_mm256_maskload_ps(float* A, __m256i B) { // CHECK-LABEL: test_mm256_maskload_ps - // CHECK: call <8 x float> @llvm.x86.avx.maskload.ps.256(ptr %{{.*}}, <8 x i32> %{{.*}}) + // CHECK: call {{.*}}<8 x float> @llvm.x86.avx.maskload.ps.256(ptr %{{.*}}, <8 x i32> %{{.*}}) return _mm256_maskload_ps(A, B); } @@ -1207,25 +1212,25 @@ void test_mm256_maskstore_ps(float* A, __m256i B, __m256 C) { __m256d test_mm256_max_pd(__m256d A, __m256d B) { // CHECK-LABEL: test_mm256_max_pd - // CHECK: call <4 x double> @llvm.x86.avx.max.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}) + // CHECK: call {{.*}}<4 x double> @llvm.x86.avx.max.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}) return _mm256_max_pd(A, B); } __m256 test_mm256_max_ps(__m256 A, __m256 B) { // CHECK-LABEL: test_mm256_max_ps - // CHECK: call <8 x float> @llvm.x86.avx.max.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}) + // CHECK: call {{.*}}<8 x float> @llvm.x86.avx.max.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}) return _mm256_max_ps(A, B); } __m256d test_mm256_min_pd(__m256d A, __m256d B) { // CHECK-LABEL: test_mm256_min_pd - // CHECK: call <4 x double> @llvm.x86.avx.min.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}) + // CHECK: call {{.*}}<4 x double> @llvm.x86.avx.min.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}) return _mm256_min_pd(A, B); } __m256 test_mm256_min_ps(__m256 A, __m256 B) { // CHECK-LABEL: test_mm256_min_ps - // CHECK: call <8 x float> @llvm.x86.avx.min.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}) + // CHECK: call {{.*}}<8 x float> @llvm.x86.avx.min.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}) return _mm256_min_ps(A, B); } @@ -1249,13 +1254,13 @@ __m256 test_mm256_moveldup_ps(__m256 A) { int test_mm256_movemask_pd(__m256d A) { // CHECK-LABEL: test_mm256_movemask_pd - // CHECK: call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> %{{.*}}) + // CHECK: call {{.*}}i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> %{{.*}}) return _mm256_movemask_pd(A); } int test_mm256_movemask_ps(__m256 A) { // CHECK-LABEL: test_mm256_movemask_ps - // CHECK: call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %{{.*}}) + // CHECK: call {{.*}}i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %{{.*}}) return _mm256_movemask_ps(A); } @@ -1334,49 +1339,49 @@ __m256i test_mm256_permute2f128_si256(__m256i A, __m256i B) { __m128d test_mm_permutevar_pd(__m128d A, __m128i B) { // CHECK-LABEL: test_mm_permutevar_pd - // CHECK: call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %{{.*}}, <2 x i64> %{{.*}}) + // CHECK: call {{.*}}<2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %{{.*}}, <2 x i64> %{{.*}}) return _mm_permutevar_pd(A, B); } __m256d test_mm256_permutevar_pd(__m256d A, __m256i B) { // CHECK-LABEL: test_mm256_permutevar_pd - // CHECK: call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %{{.*}}, <4 x i64> %{{.*}}) + // CHECK: call {{.*}}<4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %{{.*}}, <4 x i64> %{{.*}}) return _mm256_permutevar_pd(A, B); } __m128 test_mm_permutevar_ps(__m128 A, __m128i B) { // CHECK-LABEL: test_mm_permutevar_ps - // CHECK: call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %{{.*}}, <4 x i32> %{{.*}}) + // CHECK: call {{.*}}<4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %{{.*}}, <4 x i32> %{{.*}}) return _mm_permutevar_ps(A, B); } __m256 test_mm256_permutevar_ps(__m256 A, __m256i B) { // CHECK-LABEL: test_mm256_permutevar_ps - // CHECK: call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %{{.*}}, <8 x i32> %{{.*}}) + // CHECK: call {{.*}}<8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %{{.*}}, <8 x i32> %{{.*}}) return _mm256_permutevar_ps(A, B); } __m256 test_mm256_rcp_ps(__m256 A) { // CHECK-LABEL: test_mm256_rcp_ps - // CHECK: call <8 x float> @llvm.x86.avx.rcp.ps.256(<8 x float> %{{.*}}) + // CHECK: call {{.*}}<8 x float> @llvm.x86.avx.rcp.ps.256(<8 x float> %{{.*}}) return _mm256_rcp_ps(A); } __m256d test_mm256_round_pd(__m256d x) { // CHECK-LABEL: test_mm256_round_pd - // CHECK: call <4 x double> @llvm.x86.avx.round.pd.256(<4 x double> %{{.*}}, i32 4) + // CHECK: call {{.*}}<4 x double> @llvm.x86.avx.round.pd.256(<4 x double> %{{.*}}, i32 4) return _mm256_round_pd(x, 4); } __m256 test_mm256_round_ps(__m256 x) { // CHECK-LABEL: test_mm256_round_ps - // CHECK: call <8 x float> @llvm.x86.avx.round.ps.256(<8 x float> %{{.*}}, i32 4) + // CHECK: call {{.*}}<8 x float> @llvm.x86.avx.round.ps.256(<8 x float> %{{.*}}, i32 4) return _mm256_round_ps(x, 4); } __m256 test_mm256_rsqrt_ps(__m256 A) { // CHECK-LABEL: test_mm256_rsqrt_ps - // CHECK: call <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float> %{{.*}}) + // CHECK: call {{.*}}<8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float> %{{.*}}) return _mm256_rsqrt_ps(A); } @@ -1762,13 +1767,13 @@ __m256 test_mm256_shuffle_ps(__m256 A, __m256 B) { __m256d test_mm256_sqrt_pd(__m256d A) { // CHECK-LABEL: test_mm256_sqrt_pd - // CHECK: call <4 x double> @llvm.sqrt.v4f64(<4 x double> %{{.*}}) + // CHECK: call {{.*}}<4 x double> @llvm.sqrt.v4f64(<4 x double> %{{.*}}) return _mm256_sqrt_pd(A); } __m256 test_mm256_sqrt_ps(__m256 A) { // CHECK-LABEL: test_mm256_sqrt_ps - // CHECK: call <8 x float> @llvm.sqrt.v8f32(<8 x float> %{{.*}}) + // CHECK: call {{.*}}<8 x float> @llvm.sqrt.v8f32(<8 x float> %{{.*}}) return _mm256_sqrt_ps(A); } @@ -1888,91 +1893,91 @@ __m256 test_mm256_sub_ps(__m256 A, __m256 B) { int test_mm_testc_pd(__m128d A, __m128d B) { // CHECK-LABEL: test_mm_testc_pd - // CHECK: call i32 @llvm.x86.avx.vtestc.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}) + // CHECK: call {{.*}}i32 @llvm.x86.avx.vtestc.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}) return _mm_testc_pd(A, B); } int test_mm256_testc_pd(__m256d A, __m256d B) { // CHECK-LABEL: test_mm256_testc_pd - // CHECK: call i32 @llvm.x86.avx.vtestc.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}) + // CHECK: call {{.*}}i32 @llvm.x86.avx.vtestc.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}) return _mm256_testc_pd(A, B); } int test_mm_testc_ps(__m128 A, __m128 B) { // CHECK-LABEL: test_mm_testc_ps - // CHECK: call i32 @llvm.x86.avx.vtestc.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}) + // CHECK: call {{.*}}i32 @llvm.x86.avx.vtestc.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}) return _mm_testc_ps(A, B); } int test_mm256_testc_ps(__m256 A, __m256 B) { // CHECK-LABEL: test_mm256_testc_ps - // CHECK: call i32 @llvm.x86.avx.vtestc.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}) + // CHECK: call {{.*}}i32 @llvm.x86.avx.vtestc.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}) return _mm256_testc_ps(A, B); } int test_mm256_testc_si256(__m256i A, __m256i B) { // CHECK-LABEL: test_mm256_testc_si256 - // CHECK: call i32 @llvm.x86.avx.ptestc.256(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}) + // CHECK: call {{.*}}i32 @llvm.x86.avx.ptestc.256(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}) return _mm256_testc_si256(A, B); } int test_mm_testnzc_pd(__m128d A, __m128d B) { // CHECK-LABEL: test_mm_testnzc_pd - // CHECK: call i32 @llvm.x86.avx.vtestnzc.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}) + // CHECK: call {{.*}}i32 @llvm.x86.avx.vtestnzc.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}) return _mm_testnzc_pd(A, B); } int test_mm256_testnzc_pd(__m256d A, __m256d B) { // CHECK-LABEL: test_mm256_testnzc_pd - // CHECK: call i32 @llvm.x86.avx.vtestnzc.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}) + // CHECK: call {{.*}}i32 @llvm.x86.avx.vtestnzc.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}) return _mm256_testnzc_pd(A, B); } int test_mm_testnzc_ps(__m128 A, __m128 B) { // CHECK-LABEL: test_mm_testnzc_ps - // CHECK: call i32 @llvm.x86.avx.vtestnzc.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}) + // CHECK: call {{.*}}i32 @llvm.x86.avx.vtestnzc.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}) return _mm_testnzc_ps(A, B); } int test_mm256_testnzc_ps(__m256 A, __m256 B) { // CHECK-LABEL: test_mm256_testnzc_ps - // CHECK: call i32 @llvm.x86.avx.vtestnzc.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}) + // CHECK: call {{.*}}i32 @llvm.x86.avx.vtestnzc.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}) return _mm256_testnzc_ps(A, B); } int test_mm256_testnzc_si256(__m256i A, __m256i B) { // CHECK-LABEL: test_mm256_testnzc_si256 - // CHECK: call i32 @llvm.x86.avx.ptestnzc.256(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}) + // CHECK: call {{.*}}i32 @llvm.x86.avx.ptestnzc.256(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}) return _mm256_testnzc_si256(A, B); } int test_mm_testz_pd(__m128d A, __m128d B) { // CHECK-LABEL: test_mm_testz_pd - // CHECK: call i32 @llvm.x86.avx.vtestz.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}) + // CHECK: call {{.*}}i32 @llvm.x86.avx.vtestz.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}) return _mm_testz_pd(A, B); } int test_mm256_testz_pd(__m256d A, __m256d B) { // CHECK-LABEL: test_mm256_testz_pd - // CHECK: call i32 @llvm.x86.avx.vtestz.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}) + // CHECK: call {{.*}}i32 @llvm.x86.avx.vtestz.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}) return _mm256_testz_pd(A, B); } int test_mm_testz_ps(__m128 A, __m128 B) { // CHECK-LABEL: test_mm_testz_ps - // CHECK: call i32 @llvm.x86.avx.vtestz.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}) + // CHECK: call {{.*}}i32 @llvm.x86.avx.vtestz.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}) return _mm_testz_ps(A, B); } int test_mm256_testz_ps(__m256 A, __m256 B) { // CHECK-LABEL: test_mm256_testz_ps - // CHECK: call i32 @llvm.x86.avx.vtestz.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}) + // CHECK: call {{.*}}i32 @llvm.x86.avx.vtestz.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}) return _mm256_testz_ps(A, B); } int test_mm256_testz_si256(__m256i A, __m256i B) { // CHECK-LABEL: test_mm256_testz_si256 - // CHECK: call i32 @llvm.x86.avx.ptestz.256(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}) + // CHECK: call {{.*}}i32 @llvm.x86.avx.ptestz.256(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}) return _mm256_testz_si256(A, B); } @@ -2092,3 +2097,19 @@ float test_mm256_cvtss_f32(__m256 __a) // CHECK: extractelement <8 x float> %{{.*}}, i32 0 return _mm256_cvtss_f32(__a); } + +// Test constexpr handling. +#if defined(__cplusplus) && (__cplusplus >= 201103L) + +void test_constexpr() { + constexpr __m256d v_mm256_setzero_pd = _mm256_setzero_pd(); + static_assert(v_mm256_setzero_pd[0] == +0.0 && v_mm256_setzero_pd[1] == +0.0 && v_mm256_setzero_pd[2] == +0.0 && v_mm256_setzero_pd[3] == +0.0); + + constexpr __m256 v_mm256_setzero_ps = _mm256_setzero_ps(); + static_assert(v_mm256_setzero_ps[0] == +0.0f && v_mm256_setzero_ps[1] == +0.0f && v_mm256_setzero_ps[2] == +0.0f && v_mm256_setzero_ps[3] == +0.0f && v_mm256_setzero_ps[4] == +0.0f && v_mm256_setzero_ps[5] == +0.0f && v_mm256_setzero_ps[6] == +0.0f && v_mm256_setzero_ps[7] == +0.0f); + + constexpr __m256i v_mm256_setzero_si256 = _mm256_setzero_si256(); + static_assert(v_mm256_setzero_si256[0] == 0x0000000000000000ULL && v_mm256_setzero_si256[1] == 0x0000000000000000ULL && v_mm256_setzero_si256[2] == 0x0000000000000000ULL && v_mm256_setzero_si256[3] == 0x0000000000000000ULL); +} + +#endif diff --git a/clang/test/CodeGen/X86/avx2-builtins.c b/clang/test/CodeGen/X86/avx2-builtins.c index fd72e25afdb45cc..a4a3a08efad51bf 100644 --- a/clang/test/CodeGen/X86/avx2-builtins.c +++ b/clang/test/CodeGen/X86/avx2-builtins.c @@ -1,7 +1,11 @@ -// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx2 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,X64 -// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx2 -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,X64 -// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +avx2 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,X86 -// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +avx2 -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,X86 +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx2 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,X64 +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx2 -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,X64 +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +avx2 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,X86 +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +avx2 -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,X86 +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx2 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,X64 +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx2 -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,X64 +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +avx2 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,X86 +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +avx2 -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,X86 #include @@ -457,28 +461,28 @@ __m256i test_mm256_mask_i32gather_epi32(__m256i a, int const *b, __m256i c, __m2 __m128i test_mm_i32gather_epi64(long long const *b, __m128i c) { // CHECK-LABEL: test_mm_i32gather_epi64 - // CHECK: call <2 x i64> @llvm.x86.avx2.gather.d.q(<2 x i64> zeroinitializer, ptr %{{.*}}, <4 x i32> %{{.*}}, <2 x i64> %{{.*}}, i8 2) + // CHECK: call {{.*}}<2 x i64> @llvm.x86.avx2.gather.d.q(<2 x i64> zeroinitializer, ptr %{{.*}}, <4 x i32> %{{.*}}, <2 x i64> %{{.*}}, i8 2) return _mm_i32gather_epi64(b, c, 2); } __m128i test_mm_mask_i32gather_epi64(__m128i a, long long const *b, __m128i c, __m128i d) { // CHECK-LABEL: test_mm_mask_i32gather_epi64 - // CHECK: call <2 x i64> @llvm.x86.avx2.gather.d.q(<2 x i64> %{{.*}}, ptr %{{.*}}, <4 x i32> %{{.*}}, <2 x i64> %{{.*}}, i8 2) + // CHECK: call {{.*}}<2 x i64> @llvm.x86.avx2.gather.d.q(<2 x i64> %{{.*}}, ptr %{{.*}}, <4 x i32> %{{.*}}, <2 x i64> %{{.*}}, i8 2) return _mm_mask_i32gather_epi64(a, b, c, d, 2); } __m256i test_mm256_i32gather_epi64(long long const *b, __m128i c) { // X64-LABEL: test_mm256_i32gather_epi64 - // X64: call <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64> zeroinitializer, ptr %{{.*}}, <4 x i32> %{{.*}}, <4 x i64> %{{.*}}, i8 2) + // X64: call {{.*}}<4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64> zeroinitializer, ptr %{{.*}}, <4 x i32> %{{.*}}, <4 x i64> %{{.*}}, i8 2) // // X86-LABEL: test_mm256_i32gather_epi64 - // X86: call <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64> %{{.*}}, ptr %{{.*}}, <4 x i32> %{{.*}}, <4 x i64> %{{.*}}, i8 2) + // X86: call {{.*}}<4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64> %{{.*}}, ptr %{{.*}}, <4 x i32> %{{.*}}, <4 x i64> %{{.*}}, i8 2) return _mm256_i32gather_epi64(b, c, 2); } __m256i test_mm256_mask_i32gather_epi64(__m256i a, long long const *b, __m128i c, __m256i d) { // CHECK-LABEL: test_mm256_mask_i32gather_epi64 - // CHECK: call <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64> %{{.*}}, ptr %{{.*}}, <4 x i32> %{{.*}}, <4 x i64> %{{.*}}, i8 2) + // CHECK: call {{.*}}<4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64> %{{.*}}, ptr %{{.*}}, <4 x i32> %{{.*}}, <4 x i64> %{{.*}}, i8 2) return _mm256_mask_i32gather_epi64(a, b, c, d, 2); } @@ -592,28 +596,28 @@ __m128i test_mm256_mask_i64gather_epi32(__m128i a, int const *b, __m256i c, __m1 __m128i test_mm_i64gather_epi64(long long const *b, __m128i c) { // CHECK-LABEL: test_mm_i64gather_epi64 - // CHECK: call <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64> zeroinitializer, ptr %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, i8 2) + // CHECK: call {{.*}}<2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64> zeroinitializer, ptr %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, i8 2) return _mm_i64gather_epi64(b, c, 2); } __m128i test_mm_mask_i64gather_epi64(__m128i a, long long const *b, __m128i c, __m128i d) { // CHECK-LABEL: test_mm_mask_i64gather_epi64 - // CHECK: call <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64> %{{.*}}, ptr %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, i8 2) + // CHECK: call {{.*}}<2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64> %{{.*}}, ptr %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, i8 2) return _mm_mask_i64gather_epi64(a, b, c, d, 2); } __m256i test_mm256_i64gather_epi64(long long const *b, __m256i c) { // X64-LABEL: test_mm256_i64gather_epi64 - // X64: call <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64> zeroinitializer, ptr %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, i8 2) + // X64: call {{.*}}<4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64> zeroinitializer, ptr %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, i8 2) // // X86-LABEL: test_mm256_i64gather_epi64 - // X86: call <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64> %{{.*}}, ptr %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, i8 2) + // X86: call {{.*}}<4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64> %{{.*}}, ptr %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, i8 2) return _mm256_i64gather_epi64(b, c, 2); } __m256i test_mm256_mask_i64gather_epi64(__m256i a, long long const *b, __m256i c, __m256i d) { // CHECK-LABEL: test_mm256_mask_i64gather_epi64 - // CHECK: call <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64> %{{.*}}, ptr %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, i8 2) + // CHECK: call {{.*}}<4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64> %{{.*}}, ptr %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, i8 2) return _mm256_mask_i64gather_epi64(a, b, c, d, 2); } @@ -745,13 +749,13 @@ __m256i test_mm256_maskload_epi32(int const *a, __m256i m) { __m128i test_mm_maskload_epi64(long long const *a, __m128i m) { // CHECK-LABEL: test_mm_maskload_epi64 - // CHECK: call <2 x i64> @llvm.x86.avx2.maskload.q(ptr %{{.*}}, <2 x i64> %{{.*}}) + // CHECK: call {{.*}}<2 x i64> @llvm.x86.avx2.maskload.q(ptr %{{.*}}, <2 x i64> %{{.*}}) return _mm_maskload_epi64(a, m); } __m256i test_mm256_maskload_epi64(long long const *a, __m256i m) { // CHECK-LABEL: test_mm256_maskload_epi64 - // CHECK: call <4 x i64> @llvm.x86.avx2.maskload.q.256(ptr %{{.*}}, <4 x i64> %{{.*}}) + // CHECK: call {{.*}}<4 x i64> @llvm.x86.avx2.maskload.q.256(ptr %{{.*}}, <4 x i64> %{{.*}}) return _mm256_maskload_epi64(a, m); } @@ -853,7 +857,7 @@ __m256i test_mm256_min_epu32(__m256i a, __m256i b) { int test_mm256_movemask_epi8(__m256i a) { // CHECK-LABEL: test_mm256_movemask_epi8 - // CHECK: call i32 @llvm.x86.avx2.pmovmskb(<32 x i8> %{{.*}}) + // CHECK: call {{.*}}i32 @llvm.x86.avx2.pmovmskb(<32 x i8> %{{.*}}) return _mm256_movemask_epi8(a); } @@ -967,13 +971,13 @@ __m256i test_mm256_permutevar8x32_epi32(__m256i a, __m256i b) { __m256 test_mm256_permutevar8x32_ps(__m256 a, __m256i b) { // CHECK-LABEL: test_mm256_permutevar8x32_ps - // CHECK: call <8 x float> @llvm.x86.avx2.permps(<8 x float> %{{.*}}, <8 x i32> %{{.*}}) + // CHECK: call {{.*}}<8 x float> @llvm.x86.avx2.permps(<8 x float> %{{.*}}, <8 x i32> %{{.*}}) return _mm256_permutevar8x32_ps(a, b); } __m256i test_mm256_sad_epu8(__m256i x, __m256i y) { // CHECK-LABEL: test_mm256_sad_epu8 - // CHECK: call <4 x i64> @llvm.x86.avx2.psad.bw(<32 x i8> %{{.*}}, <32 x i8> %{{.*}}) + // CHECK: call {{.*}}<4 x i64> @llvm.x86.avx2.psad.bw(<32 x i8> %{{.*}}, <32 x i8> %{{.*}}) return _mm256_sad_epu8(x, y); } @@ -1045,13 +1049,13 @@ __m256i test_mm256_slli_epi32_2(__m256i a, int b) { __m256i test_mm256_slli_epi64(__m256i a) { // CHECK-LABEL: test_mm256_slli_epi64 - // CHECK: call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %{{.*}}, i32 %{{.*}}) + // CHECK: call {{.*}}<4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %{{.*}}, i32 %{{.*}}) return _mm256_slli_epi64(a, 3); } __m256i test_mm256_slli_epi64_2(__m256i a, int b) { // CHECK-LABEL: test_mm256_slli_epi64_2 - // CHECK: call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %{{.*}}, i32 %{{.*}}) + // CHECK: call {{.*}}<4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %{{.*}}, i32 %{{.*}}) return _mm256_slli_epi64(a, b); } @@ -1075,13 +1079,13 @@ __m256i test_mm256_sllv_epi32(__m256i a, __m256i b) { __m128i test_mm_sllv_epi64(__m128i a, __m128i b) { // CHECK-LABEL: test_mm_sllv_epi64 - // CHECK: call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}) + // CHECK: call {{.*}}<2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}) return _mm_sllv_epi64(a, b); } __m256i test_mm256_sllv_epi64(__m256i a, __m256i b) { // CHECK-LABEL: test_mm256_sllv_epi64 - // CHECK: call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}) + // CHECK: call {{.*}}<4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}) return _mm256_sllv_epi64(a, b); } @@ -1147,7 +1151,7 @@ __m256i test_mm256_srl_epi32(__m256i a, __m128i b) { __m256i test_mm256_srl_epi64(__m256i a, __m128i b) { // CHECK-LABEL: test_mm256_srl_epi64 - // CHECK: call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %{{.*}}, <2 x i64> %{{.*}}) + // CHECK: call {{.*}}<4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %{{.*}}, <2 x i64> %{{.*}}) return _mm256_srl_epi64(a, b); } @@ -1177,13 +1181,13 @@ __m256i test_mm256_srli_epi32_2(__m256i a, int b) { __m256i test_mm256_srli_epi64(__m256i a) { // CHECK-LABEL: test_mm256_srli_epi64 - // CHECK: call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %{{.*}}, i32 %{{.*}}) + // CHECK: call {{.*}}<4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %{{.*}}, i32 %{{.*}}) return _mm256_srli_epi64(a, 3); } __m256i test_mm256_srli_epi64_2(__m256i a, int b) { // CHECK-LABEL: test_mm256_srli_epi64_2 - // CHECK: call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %{{.*}}, i32 %{{.*}}) + // CHECK: call {{.*}}<4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %{{.*}}, i32 %{{.*}}) return _mm256_srli_epi64(a, b); } @@ -1207,13 +1211,13 @@ __m256i test_mm256_srlv_epi32(__m256i a, __m256i b) { __m128i test_mm_srlv_epi64(__m128i a, __m128i b) { // CHECK-LABEL: test_mm_srlv_epi64 - // CHECK: call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}) + // CHECK: call {{.*}}<2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}) return _mm_srlv_epi64(a, b); } __m256i test_mm256_srlv_epi64(__m256i a, __m256i b) { // CHECK-LABEL: test_mm256_srlv_epi64 - // CHECK: call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}) + // CHECK: call {{.*}}<4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}) return _mm256_srlv_epi64(a, b); } diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c index 0e3463849951ed4..0b4f778a0637ab7 100644 --- a/clang/test/CodeGen/X86/avx512f-builtins.c +++ b/clang/test/CodeGen/X86/avx512f-builtins.c @@ -1,19 +1,21 @@ -// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512f -emit-llvm -o - -Wall -Werror -Wsign-conversion | FileCheck %s -// RUN: %clang_cc1 -flax-vector-conversions=none -fms-extensions -fms-compatibility -ffreestanding %s -triple=x86_64-windows-msvc -target-feature +avx512f -emit-llvm -o - -Wall -Werror -Wsign-conversion | FileCheck %s +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512f -emit-llvm -o - -Wall -Werror -Wsign-conversion | FileCheck %s +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -fms-extensions -fms-compatibility -ffreestanding %s -triple=x86_64-windows-msvc -target-feature +avx512f -emit-llvm -o - -Wall -Werror -Wsign-conversion | FileCheck %s +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512f -emit-llvm -o - -Wall -Werror -Wsign-conversion | FileCheck %s +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -fms-extensions -fms-compatibility -ffreestanding %s -triple=x86_64-windows-msvc -target-feature +avx512f -emit-llvm -o - -Wall -Werror -Wsign-conversion | FileCheck %s #include __m512d test_mm512_sqrt_pd(__m512d a) { - // CHECK-LABEL: @test_mm512_sqrt_pd - // CHECK: call <8 x double> @llvm.sqrt.v8f64(<8 x double> %{{.*}}) + // CHECK-LABEL: test_mm512_sqrt_pd + // CHECK: call {{.*}}<8 x double> @llvm.sqrt.v8f64(<8 x double> %{{.*}}) return _mm512_sqrt_pd(a); } __m512d test_mm512_mask_sqrt_pd (__m512d __W, __mmask8 __U, __m512d __A) { - // CHECK-LABEL: @test_mm512_mask_sqrt_pd - // CHECK: call <8 x double> @llvm.sqrt.v8f64(<8 x double> %{{.*}}) + // CHECK-LABEL: test_mm512_mask_sqrt_pd + // CHECK: call {{.*}}<8 x double> @llvm.sqrt.v8f64(<8 x double> %{{.*}}) // CHECK: bitcast i8 %{{.*}} to <8 x i1> // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} return _mm512_mask_sqrt_pd (__W,__U,__A); @@ -21,8 +23,8 @@ __m512d test_mm512_mask_sqrt_pd (__m512d __W, __mmask8 __U, __m512d __A) __m512d test_mm512_maskz_sqrt_pd (__mmask8 __U, __m512d __A) { - // CHECK-LABEL: @test_mm512_maskz_sqrt_pd - // CHECK: call <8 x double> @llvm.sqrt.v8f64(<8 x double> %{{.*}}) + // CHECK-LABEL: test_mm512_maskz_sqrt_pd + // CHECK: call {{.*}}<8 x double> @llvm.sqrt.v8f64(<8 x double> %{{.*}}) // CHECK: bitcast i8 %{{.*}} to <8 x i1> // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> {{.*}} return _mm512_maskz_sqrt_pd (__U,__A); @@ -30,8 +32,8 @@ __m512d test_mm512_maskz_sqrt_pd (__mmask8 __U, __m512d __A) __m512d test_mm512_mask_sqrt_round_pd(__m512d __W,__mmask8 __U,__m512d __A) { - // CHECK-LABEL: @test_mm512_mask_sqrt_round_pd - // CHECK: call <8 x double> @llvm.x86.avx512.sqrt.pd.512(<8 x double> %{{.*}}, i32 11) + // CHECK-LABEL: test_mm512_mask_sqrt_round_pd + // CHECK: call {{.*}}<8 x double> @llvm.x86.avx512.sqrt.pd.512(<8 x double> %{{.*}}, i32 11) // CHECK: bitcast i8 %{{.*}} to <8 x i1> // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} return _mm512_mask_sqrt_round_pd(__W,__U,__A,_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); @@ -39,8 +41,8 @@ __m512d test_mm512_mask_sqrt_round_pd(__m512d __W,__mmask8 __U,__m512d __A) __m512d test_mm512_maskz_sqrt_round_pd(__mmask8 __U,__m512d __A) { - // CHECK-LABEL: @test_mm512_maskz_sqrt_round_pd - // CHECK: call <8 x double> @llvm.x86.avx512.sqrt.pd.512(<8 x double> %{{.*}}, i32 11) + // CHECK-LABEL: test_mm512_maskz_sqrt_round_pd + // CHECK: call {{.*}}<8 x double> @llvm.x86.avx512.sqrt.pd.512(<8 x double> %{{.*}}, i32 11) // CHECK: bitcast i8 %{{.*}} to <8 x i1> // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> {{.*}} return _mm512_maskz_sqrt_round_pd(__U,__A,_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); @@ -48,22 +50,22 @@ __m512d test_mm512_maskz_sqrt_round_pd(__mmask8 __U,__m512d __A) __m512d test_mm512_sqrt_round_pd(__m512d __A) { - // CHECK-LABEL: @test_mm512_sqrt_round_pd - // CHECK: call <8 x double> @llvm.x86.avx512.sqrt.pd.512(<8 x double> %{{.*}}, i32 11) + // CHECK-LABEL: test_mm512_sqrt_round_pd + // CHECK: call {{.*}}<8 x double> @llvm.x86.avx512.sqrt.pd.512(<8 x double> %{{.*}}, i32 11) return _mm512_sqrt_round_pd(__A,_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m512 test_mm512_sqrt_ps(__m512 a) { - // CHECK-LABEL: @test_mm512_sqrt_ps - // CHECK: call <16 x float> @llvm.sqrt.v16f32(<16 x float> %{{.*}}) + // CHECK-LABEL: test_mm512_sqrt_ps + // CHECK: call {{.*}}<16 x float> @llvm.sqrt.v16f32(<16 x float> %{{.*}}) return _mm512_sqrt_ps(a); } __m512 test_mm512_mask_sqrt_ps(__m512 __W, __mmask16 __U, __m512 __A) { - // CHECK-LABEL: @test_mm512_mask_sqrt_ps - // CHECK: call <16 x float> @llvm.sqrt.v16f32(<16 x float> %{{.*}}) + // CHECK-LABEL: test_mm512_mask_sqrt_ps + // CHECK: call {{.*}}<16 x float> @llvm.sqrt.v16f32(<16 x float> %{{.*}}) // CHECK: bitcast i16 %{{.*}} to <16 x i1> // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return _mm512_mask_sqrt_ps( __W, __U, __A); @@ -71,8 +73,8 @@ __m512 test_mm512_mask_sqrt_ps(__m512 __W, __mmask16 __U, __m512 __A) __m512 test_mm512_maskz_sqrt_ps( __mmask16 __U, __m512 __A) { - // CHECK-LABEL: @test_mm512_maskz_sqrt_ps - // CHECK: call <16 x float> @llvm.sqrt.v16f32(<16 x float> %{{.*}}) + // CHECK-LABEL: test_mm512_maskz_sqrt_ps + // CHECK: call {{.*}}<16 x float> @llvm.sqrt.v16f32(<16 x float> %{{.*}}) // CHECK: bitcast i16 %{{.*}} to <16 x i1> // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> {{.*}} return _mm512_maskz_sqrt_ps(__U ,__A); @@ -80,8 +82,8 @@ __m512 test_mm512_maskz_sqrt_ps( __mmask16 __U, __m512 __A) __m512 test_mm512_mask_sqrt_round_ps(__m512 __W,__mmask16 __U,__m512 __A) { - // CHECK-LABEL: @test_mm512_mask_sqrt_round_ps - // CHECK: call <16 x float> @llvm.x86.avx512.sqrt.ps.512(<16 x float> %{{.*}}, i32 11) + // CHECK-LABEL: test_mm512_mask_sqrt_round_ps + // CHECK: call {{.*}}<16 x float> @llvm.x86.avx512.sqrt.ps.512(<16 x float> %{{.*}}, i32 11) // CHECK: bitcast i16 %{{.*}} to <16 x i1> // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return _mm512_mask_sqrt_round_ps(__W,__U,__A,_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); @@ -89,8 +91,8 @@ __m512 test_mm512_mask_sqrt_round_ps(__m512 __W,__mmask16 __U,__m512 __A) __m512 test_mm512_maskz_sqrt_round_ps(__mmask16 __U,__m512 __A) { - // CHECK-LABEL: @test_mm512_maskz_sqrt_round_ps - // CHECK: call <16 x float> @llvm.x86.avx512.sqrt.ps.512(<16 x float> %{{.*}}, i32 11) + // CHECK-LABEL: test_mm512_maskz_sqrt_round_ps + // CHECK: call {{.*}}<16 x float> @llvm.x86.avx512.sqrt.ps.512(<16 x float> %{{.*}}, i32 11) // CHECK: bitcast i16 %{{.*}} to <16 x i1> // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> {{.*}} return _mm512_maskz_sqrt_round_ps(__U,__A,_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); @@ -98,84 +100,84 @@ __m512 test_mm512_maskz_sqrt_round_ps(__mmask16 __U,__m512 __A) __m512 test_mm512_sqrt_round_ps(__m512 __A) { - // CHECK-LABEL: @test_mm512_sqrt_round_ps - // CHECK: call <16 x float> @llvm.x86.avx512.sqrt.ps.512(<16 x float> %{{.*}}, i32 11) + // CHECK-LABEL: test_mm512_sqrt_round_ps + // CHECK: call {{.*}}<16 x float> @llvm.x86.avx512.sqrt.ps.512(<16 x float> %{{.*}}, i32 11) return _mm512_sqrt_round_ps(__A,_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m512d test_mm512_rsqrt14_pd(__m512d a) { - // CHECK-LABEL: @test_mm512_rsqrt14_pd + // CHECK-LABEL: test_mm512_rsqrt14_pd // CHECK: @llvm.x86.avx512.rsqrt14.pd.512 return _mm512_rsqrt14_pd(a); } __m512d test_mm512_mask_rsqrt14_pd (__m512d __W, __mmask8 __U, __m512d __A) { - // CHECK-LABEL: @test_mm512_mask_rsqrt14_pd + // CHECK-LABEL: test_mm512_mask_rsqrt14_pd // CHECK: @llvm.x86.avx512.rsqrt14.pd.512 return _mm512_mask_rsqrt14_pd (__W,__U,__A); } __m512d test_mm512_maskz_rsqrt14_pd (__mmask8 __U, __m512d __A) { - // CHECK-LABEL: @test_mm512_maskz_rsqrt14_pd + // CHECK-LABEL: test_mm512_maskz_rsqrt14_pd // CHECK: @llvm.x86.avx512.rsqrt14.pd.512 return _mm512_maskz_rsqrt14_pd (__U,__A); } __m512 test_mm512_rsqrt14_ps(__m512 a) { - // CHECK-LABEL: @test_mm512_rsqrt14_ps + // CHECK-LABEL: test_mm512_rsqrt14_ps // CHECK: @llvm.x86.avx512.rsqrt14.ps.512 return _mm512_rsqrt14_ps(a); } __m512 test_mm512_mask_rsqrt14_ps (__m512 __W, __mmask16 __U, __m512 __A) { - // CHECK-LABEL: @test_mm512_mask_rsqrt14_ps + // CHECK-LABEL: test_mm512_mask_rsqrt14_ps // CHECK: @llvm.x86.avx512.rsqrt14.ps.512 return _mm512_mask_rsqrt14_ps (__W,__U,__A); } __m512 test_mm512_maskz_rsqrt14_ps (__mmask16 __U, __m512 __A) { - // CHECK-LABEL: @test_mm512_maskz_rsqrt14_ps + // CHECK-LABEL: test_mm512_maskz_rsqrt14_ps // CHECK: @llvm.x86.avx512.rsqrt14.ps.512 return _mm512_maskz_rsqrt14_ps (__U,__A); } __m512 test_mm512_add_ps(__m512 a, __m512 b) { - // CHECK-LABEL: @test_mm512_add_ps + // CHECK-LABEL: test_mm512_add_ps // CHECK: fadd <16 x float> return _mm512_add_ps(a, b); } __m512d test_mm512_add_pd(__m512d a, __m512d b) { - // CHECK-LABEL: @test_mm512_add_pd + // CHECK-LABEL: test_mm512_add_pd // CHECK: fadd <8 x double> return _mm512_add_pd(a, b); } __m512 test_mm512_mul_ps(__m512 a, __m512 b) { - // CHECK-LABEL: @test_mm512_mul_ps + // CHECK-LABEL: test_mm512_mul_ps // CHECK: fmul <16 x float> return _mm512_mul_ps(a, b); } __m512d test_mm512_mul_pd(__m512d a, __m512d b) { - // CHECK-LABEL: @test_mm512_mul_pd + // CHECK-LABEL: test_mm512_mul_pd // CHECK: fmul <8 x double> return _mm512_mul_pd(a, b); } void test_mm512_storeu_si512 (void *__P, __m512i __A) { - // CHECK-LABEL: @test_mm512_storeu_si512 + // CHECK-LABEL: test_mm512_storeu_si512 // CHECK: store <8 x i64> %{{.*}}, ptr %{{.*}}, align 1{{$}} // CHECK-NEXT: ret void _mm512_storeu_si512 ( __P,__A); @@ -183,7 +185,7 @@ void test_mm512_storeu_si512 (void *__P, __m512i __A) void test_mm512_storeu_ps(void *p, __m512 a) { - // CHECK-LABEL: @test_mm512_storeu_ps + // CHECK-LABEL: test_mm512_storeu_ps // CHECK: store <16 x float> %{{.*}}, ptr %{{.*}}, align 1{{$}} // CHECK-NEXT: ret void _mm512_storeu_ps(p, a); @@ -191,7 +193,7 @@ void test_mm512_storeu_ps(void *p, __m512 a) void test_mm512_storeu_pd(void *p, __m512d a) { - // CHECK-LABEL: @test_mm512_storeu_pd + // CHECK-LABEL: test_mm512_storeu_pd // CHECK: store <8 x double> %{{.*}}, ptr %{{.*}}, align 1{{$}} // CHECK-NEXT: ret void _mm512_storeu_pd(p, a); @@ -199,14 +201,14 @@ void test_mm512_storeu_pd(void *p, __m512d a) void test_mm512_mask_store_ps(void *p, __m512 a, __mmask16 m) { - // CHECK-LABEL: @test_mm512_mask_store_ps + // CHECK-LABEL: test_mm512_mask_store_ps // CHECK: @llvm.masked.store.v16f32.p0(<16 x float> %{{.*}}, ptr %{{.*}}, i32 64, <16 x i1> %{{.*}}) _mm512_mask_store_ps(p, m, a); } void test_mm512_store_si512 (void *__P, __m512i __A) { - // CHECK-LABEL: @test_mm512_store_si512 + // CHECK-LABEL: test_mm512_store_si512 // CHECK: load <8 x i64>, ptr %__A.addr.i, align 64{{$}} // CHECK: [[SI512_3:%.+]] = load ptr, ptr %__P.addr.i, align 8{{$}} // CHECK: store <8 x i64> @@ -215,7 +217,7 @@ void test_mm512_store_si512 (void *__P, __m512i __A) void test_mm512_store_epi32 (void *__P, __m512i __A) { - // CHECK-LABEL: @test_mm512_store_epi32 + // CHECK-LABEL: test_mm512_store_epi32 // CHECK: load <8 x i64>, ptr %__A.addr.i, align 64{{$}} // CHECK: [[Si32_3:%.+]] = load ptr, ptr %__P.addr.i, align 8{{$}} // CHECK: store <8 x i64> @@ -224,7 +226,7 @@ void test_mm512_store_epi32 (void *__P, __m512i __A) void test_mm512_store_epi64 (void *__P, __m512i __A) { - // CHECK-LABEL: @test_mm512_store_epi64 + // CHECK-LABEL: test_mm512_store_epi64 // CHECK: load <8 x i64>, ptr %__A.addr.i, align 64{{$}} // CHECK: [[SI64_3:%.+]] = load ptr, ptr %__P.addr.i, align 8{{$}} // CHECK: store <8 x i64> @@ -233,129 +235,129 @@ void test_mm512_store_epi64 (void *__P, __m512i __A) void test_mm512_store_ps(void *p, __m512 a) { - // CHECK-LABEL: @test_mm512_store_ps + // CHECK-LABEL: test_mm512_store_ps // CHECK: store <16 x float> _mm512_store_ps(p, a); } void test_mm512_store_pd(void *p, __m512d a) { - // CHECK-LABEL: @test_mm512_store_pd + // CHECK-LABEL: test_mm512_store_pd // CHECK: store <8 x double> _mm512_store_pd(p, a); } void test_mm512_mask_store_pd(void *p, __m512d a, __mmask8 m) { - // CHECK-LABEL: @test_mm512_mask_store_pd + // CHECK-LABEL: test_mm512_mask_store_pd // CHECK: @llvm.masked.store.v8f64.p0(<8 x double> %{{.*}}, ptr %{{.*}}, i32 64, <8 x i1> %{{.*}}) _mm512_mask_store_pd(p, m, a); } void test_mm512_storeu_epi32(void *__P, __m512i __A) { - // CHECK-LABEL: @test_mm512_storeu_epi32 + // CHECK-LABEL: test_mm512_storeu_epi32 // CHECK: store <8 x i64> %{{.*}}, ptr %{{.*}}, align 1{{$}} return _mm512_storeu_epi32(__P, __A); } void test_mm512_mask_storeu_epi32(void *__P, __mmask16 __U, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_storeu_epi32 + // CHECK-LABEL: test_mm512_mask_storeu_epi32 // CHECK: @llvm.masked.store.v16i32.p0(<16 x i32> %{{.*}}, ptr %{{.*}}, i32 1, <16 x i1> %{{.*}}) return _mm512_mask_storeu_epi32(__P, __U, __A); } void test_mm512_storeu_epi64(void *__P, __m512i __A) { - // CHECK-LABEL: @test_mm512_storeu_epi64 + // CHECK-LABEL: test_mm512_storeu_epi64 // CHECK: store <8 x i64> %{{.*}}, ptr %{{.*}}, align 1{{$}} return _mm512_storeu_epi64(__P, __A); } void test_mm512_mask_storeu_epi64(void *__P, __mmask8 __U, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_storeu_epi64 + // CHECK-LABEL: test_mm512_mask_storeu_epi64 // CHECK: @llvm.masked.store.v8i64.p0(<8 x i64> %{{.*}}, ptr %{{.*}}, i32 1, <8 x i1> %{{.*}}) return _mm512_mask_storeu_epi64(__P, __U, __A); } __m512i test_mm512_loadu_si512 (void *__P) { - // CHECK-LABEL: @test_mm512_loadu_si512 + // CHECK-LABEL: test_mm512_loadu_si512 // CHECK: load <8 x i64>, ptr %{{.*}}, align 1{{$}} return _mm512_loadu_si512 ( __P); } __m512i test_mm512_loadu_epi32 (void *__P) { - // CHECK-LABEL: @test_mm512_loadu_epi32 + // CHECK-LABEL: test_mm512_loadu_epi32 // CHECK: load <8 x i64>, ptr %{{.*}}, align 1{{$}} return _mm512_loadu_epi32 (__P); } __m512i test_mm512_mask_loadu_epi32 (__m512i __W, __mmask16 __U, void *__P) { - // CHECK-LABEL: @test_mm512_mask_loadu_epi32 + // CHECK-LABEL: test_mm512_mask_loadu_epi32 // CHECK: @llvm.masked.load.v16i32.p0(ptr %{{.*}}, i32 1, <16 x i1> %{{.*}}, <16 x i32> %{{.*}}) return _mm512_mask_loadu_epi32 (__W,__U, __P); } __m512i test_mm512_maskz_loadu_epi32 (__mmask16 __U, void *__P) { - // CHECK-LABEL: @test_mm512_maskz_loadu_epi32 + // CHECK-LABEL: test_mm512_maskz_loadu_epi32 // CHECK: @llvm.masked.load.v16i32.p0(ptr %{{.*}}, i32 1, <16 x i1> %{{.*}}, <16 x i32> %{{.*}}) return _mm512_maskz_loadu_epi32 (__U, __P); } __m512i test_mm512_loadu_epi64 (void *__P) { - // CHECK-LABEL: @test_mm512_loadu_epi64 + // CHECK-LABEL: test_mm512_loadu_epi64 // CHECK: load <8 x i64>, ptr %{{.*}}, align 1{{$}} return _mm512_loadu_epi64 (__P); } __m512i test_mm512_mask_loadu_epi64 (__m512i __W, __mmask8 __U, void *__P) { - // CHECK-LABEL: @test_mm512_mask_loadu_epi64 + // CHECK-LABEL: test_mm512_mask_loadu_epi64 // CHECK: @llvm.masked.load.v8i64.p0(ptr %{{.*}}, i32 1, <8 x i1> %{{.*}}, <8 x i64> %{{.*}}) return _mm512_mask_loadu_epi64 (__W,__U, __P); } __m512i test_mm512_maskz_loadu_epi64 (__mmask16 __U, void *__P) { - // CHECK-LABEL: @test_mm512_maskz_loadu_epi64 + // CHECK-LABEL: test_mm512_maskz_loadu_epi64 // CHECK: @llvm.masked.load.v8i64.p0(ptr %{{.*}}, i32 1, <8 x i1> %{{.*}}, <8 x i64> %{{.*}}) return _mm512_maskz_loadu_epi64 (__U, __P); } __m512 test_mm512_loadu_ps(void *p) { - // CHECK-LABEL: @test_mm512_loadu_ps + // CHECK-LABEL: test_mm512_loadu_ps // CHECK: load <16 x float>, ptr {{.*}}, align 1{{$}} return _mm512_loadu_ps(p); } __m512 test_mm512_mask_loadu_ps (__m512 __W, __mmask16 __U, void *__P) { - // CHECK-LABEL: @test_mm512_mask_loadu_ps + // CHECK-LABEL: test_mm512_mask_loadu_ps // CHECK: @llvm.masked.load.v16f32.p0(ptr %{{.*}}, i32 1, <16 x i1> %{{.*}}, <16 x float> %{{.*}}) return _mm512_mask_loadu_ps (__W,__U, __P); } __m512d test_mm512_loadu_pd(void *p) { - // CHECK-LABEL: @test_mm512_loadu_pd + // CHECK-LABEL: test_mm512_loadu_pd // CHECK: load <8 x double>, ptr {{.*}}, align 1{{$}} return _mm512_loadu_pd(p); } __m512d test_mm512_mask_loadu_pd (__m512d __W, __mmask8 __U, void *__P) { - // CHECK-LABEL: @test_mm512_mask_loadu_pd + // CHECK-LABEL: test_mm512_mask_loadu_pd // CHECK: @llvm.masked.load.v8f64.p0(ptr %{{.*}}, i32 1, <8 x i1> %{{.*}}, <8 x double> %{{.*}}) return _mm512_mask_loadu_pd (__W,__U, __P); } __m512i test_mm512_load_si512 (void *__P) { - // CHECK-LABEL: @test_mm512_load_si512 + // CHECK-LABEL: test_mm512_load_si512 // CHECK: [[LI512_1:%.+]] = load ptr, ptr %__P.addr.i, align 8{{$}} // CHECK: load <8 x i64>, ptr [[LI512_1]], align 64{{$}} return _mm512_load_si512 ( __P); @@ -363,7 +365,7 @@ __m512i test_mm512_load_si512 (void *__P) __m512i test_mm512_load_epi32 (void *__P) { - // CHECK-LABEL: @test_mm512_load_epi32 + // CHECK-LABEL: test_mm512_load_epi32 // CHECK: [[LI32_1:%.+]] = load ptr, ptr %__P.addr.i, align 8{{$}} // CHECK: load <8 x i64>, ptr [[LI32_1]], align 64{{$}} return _mm512_load_epi32 ( __P); @@ -371,7 +373,7 @@ __m512i test_mm512_load_epi32 (void *__P) __m512i test_mm512_load_epi64 (void *__P) { - // CHECK-LABEL: @test_mm512_load_epi64 + // CHECK-LABEL: test_mm512_load_epi64 // CHECK: [[LI64_1:%.+]] = load ptr, ptr %__P.addr.i, align 8{{$}} // CHECK: load <8 x i64>, ptr [[LI64_1]], align 64{{$}} return _mm512_load_epi64 ( __P); @@ -379,49 +381,49 @@ __m512i test_mm512_load_epi64 (void *__P) __m512 test_mm512_load_ps(void *p) { - // CHECK-LABEL: @test_mm512_load_ps + // CHECK-LABEL: test_mm512_load_ps // CHECK: load <16 x float>, ptr %{{.*}}, align 64{{$}} return _mm512_load_ps(p); } __m512 test_mm512_mask_load_ps (__m512 __W, __mmask16 __U, void *__P) { - // CHECK-LABEL: @test_mm512_mask_load_ps + // CHECK-LABEL: test_mm512_mask_load_ps // CHECK: @llvm.masked.load.v16f32.p0(ptr %{{.*}}, i32 64, <16 x i1> %{{.*}}, <16 x float> %{{.*}}) return _mm512_mask_load_ps (__W,__U, __P); } __m512 test_mm512_maskz_load_ps(__mmask16 __U, void *__P) { - // CHECK-LABEL: @test_mm512_maskz_load_ps + // CHECK-LABEL: test_mm512_maskz_load_ps // CHECK: @llvm.masked.load.v16f32.p0(ptr %{{.*}}, i32 64, <16 x i1> %{{.*}}, <16 x float> %{{.*}}) return _mm512_maskz_load_ps(__U, __P); } __m512d test_mm512_load_pd(void *p) { - // CHECK-LABEL: @test_mm512_load_pd + // CHECK-LABEL: test_mm512_load_pd // CHECK: load <8 x double>, ptr %{{.*}}, align 64{{$}} return _mm512_load_pd(p); } __m512d test_mm512_mask_load_pd (__m512d __W, __mmask8 __U, void *__P) { - // CHECK-LABEL: @test_mm512_mask_load_pd + // CHECK-LABEL: test_mm512_mask_load_pd // CHECK: @llvm.masked.load.v8f64.p0(ptr %{{.*}}, i32 64, <8 x i1> %{{.*}}, <8 x double> %{{.*}}) return _mm512_mask_load_pd (__W,__U, __P); } __m512d test_mm512_maskz_load_pd(__mmask8 __U, void *__P) { - // CHECK-LABEL: @test_mm512_maskz_load_pd + // CHECK-LABEL: test_mm512_maskz_load_pd // CHECK: @llvm.masked.load.v8f64.p0(ptr %{{.*}}, i32 64, <8 x i1> %{{.*}}, <8 x double> %{{.*}}) return _mm512_maskz_load_pd(__U, __P); } __m512d test_mm512_set1_pd(double d) { - // CHECK-LABEL: @test_mm512_set1_pd + // CHECK-LABEL: test_mm512_set1_pd // CHECK: insertelement <8 x double> {{.*}}, i32 0 // CHECK: insertelement <8 x double> {{.*}}, i32 1 // CHECK: insertelement <8 x double> {{.*}}, i32 2 @@ -435,7 +437,7 @@ __m512d test_mm512_set1_pd(double d) __mmask16 test_mm512_knot(__mmask16 a) { - // CHECK-LABEL: @test_mm512_knot + // CHECK-LABEL: test_mm512_knot // CHECK: [[IN:%.*]] = bitcast i16 %{{.*}} to <16 x i1> // CHECK: [[NOT:%.*]] = xor <16 x i1> [[IN]], // CHECK: bitcast <16 x i1> [[NOT]] to i16 @@ -444,14 +446,14 @@ __mmask16 test_mm512_knot(__mmask16 a) __m512i test_mm512_alignr_epi32(__m512i a, __m512i b) { - // CHECK-LABEL: @test_mm512_alignr_epi32 + // CHECK-LABEL: test_mm512_alignr_epi32 // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> return _mm512_alignr_epi32(a, b, 2); } __m512i test_mm512_mask_alignr_epi32(__m512i w, __mmask16 u, __m512i a, __m512i b) { - // CHECK-LABEL: @test_mm512_mask_alignr_epi32 + // CHECK-LABEL: test_mm512_mask_alignr_epi32 // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> {{.*}} return _mm512_mask_alignr_epi32(w, u, a, b, 2); @@ -459,7 +461,7 @@ __m512i test_mm512_mask_alignr_epi32(__m512i w, __mmask16 u, __m512i a, __m512i __m512i test_mm512_maskz_alignr_epi32( __mmask16 u, __m512i a, __m512i b) { - // CHECK-LABEL: @test_mm512_maskz_alignr_epi32 + // CHECK-LABEL: test_mm512_maskz_alignr_epi32 // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> {{.*}} return _mm512_maskz_alignr_epi32(u, a, b, 2); @@ -467,14 +469,14 @@ __m512i test_mm512_maskz_alignr_epi32( __mmask16 u, __m512i a, __m512i b) __m512i test_mm512_alignr_epi64(__m512i a, __m512i b) { - // CHECK-LABEL: @test_mm512_alignr_epi64 + // CHECK-LABEL: test_mm512_alignr_epi64 // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> return _mm512_alignr_epi64(a, b, 2); } __m512i test_mm512_mask_alignr_epi64(__m512i w, __mmask8 u, __m512i a, __m512i b) { - // CHECK-LABEL: @test_mm512_mask_alignr_epi64 + // CHECK-LABEL: test_mm512_mask_alignr_epi64 // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> {{.*}} return _mm512_mask_alignr_epi64(w, u, a, b, 2); @@ -482,47 +484,47 @@ __m512i test_mm512_mask_alignr_epi64(__m512i w, __mmask8 u, __m512i a, __m512i b __m512i test_mm512_maskz_alignr_epi64( __mmask8 u, __m512i a, __m512i b) { - // CHECK-LABEL: @test_mm512_maskz_alignr_epi64 + // CHECK-LABEL: test_mm512_maskz_alignr_epi64 // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> {{.*}} return _mm512_maskz_alignr_epi64(u, a, b, 2); } __m512d test_mm512_fmadd_round_pd(__m512d __A, __m512d __B, __m512d __C) { - // CHECK-LABEL: @test_mm512_fmadd_round_pd + // CHECK-LABEL: test_mm512_fmadd_round_pd // CHECK: @llvm.x86.avx512.vfmadd.pd.512 return _mm512_fmadd_round_pd(__A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m512d test_mm512_mask_fmadd_round_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) { - // CHECK-LABEL: @test_mm512_mask_fmadd_round_pd + // CHECK-LABEL: test_mm512_mask_fmadd_round_pd // CHECK: @llvm.x86.avx512.vfmadd.pd.512 // CHECK: bitcast i8 %{{.*}} to <8 x i1> // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} return _mm512_mask_fmadd_round_pd(__A, __U, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m512d test_mm512_mask3_fmadd_round_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) { - // CHECK-LABEL: @test_mm512_mask3_fmadd_round_pd + // CHECK-LABEL: test_mm512_mask3_fmadd_round_pd // CHECK: @llvm.x86.avx512.vfmadd.pd.512 // CHECK: bitcast i8 %{{.*}} to <8 x i1> // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} return _mm512_mask3_fmadd_round_pd(__A, __B, __C, __U, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m512d test_mm512_maskz_fmadd_round_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) { - // CHECK-LABEL: @test_mm512_maskz_fmadd_round_pd + // CHECK-LABEL: test_mm512_maskz_fmadd_round_pd // CHECK: @llvm.x86.avx512.vfmadd.pd.512 // CHECK: bitcast i8 %{{.*}} to <8 x i1> // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> zeroinitializer return _mm512_maskz_fmadd_round_pd(__U, __A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m512d test_mm512_fmsub_round_pd(__m512d __A, __m512d __B, __m512d __C) { - // CHECK-LABEL: @test_mm512_fmsub_round_pd + // CHECK-LABEL: test_mm512_fmsub_round_pd // CHECK: fneg <8 x double> // CHECK: @llvm.x86.avx512.vfmadd.pd.512 return _mm512_fmsub_round_pd(__A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m512d test_mm512_mask_fmsub_round_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) { - // CHECK-LABEL: @test_mm512_mask_fmsub_round_pd + // CHECK-LABEL: test_mm512_mask_fmsub_round_pd // CHECK: fneg <8 x double> // CHECK: @llvm.x86.avx512.vfmadd.pd.512 // CHECK: bitcast i8 %{{.*}} to <8 x i1> @@ -530,7 +532,7 @@ __m512d test_mm512_mask_fmsub_round_pd(__m512d __A, __mmask8 __U, __m512d __B, _ return _mm512_mask_fmsub_round_pd(__A, __U, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m512d test_mm512_maskz_fmsub_round_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) { - // CHECK-LABEL: @test_mm512_maskz_fmsub_round_pd + // CHECK-LABEL: test_mm512_maskz_fmsub_round_pd // CHECK: fneg <8 x double> // CHECK: @llvm.x86.avx512.vfmadd.pd.512 // CHECK: bitcast i8 %{{.*}} to <8 x i1> @@ -538,13 +540,13 @@ __m512d test_mm512_maskz_fmsub_round_pd(__mmask8 __U, __m512d __A, __m512d __B, return _mm512_maskz_fmsub_round_pd(__U, __A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m512d test_mm512_fnmadd_round_pd(__m512d __A, __m512d __B, __m512d __C) { - // CHECK-LABEL: @test_mm512_fnmadd_round_pd + // CHECK-LABEL: test_mm512_fnmadd_round_pd // CHECK: fneg <8 x double> // CHECK: @llvm.x86.avx512.vfmadd.pd.512 return _mm512_fnmadd_round_pd(__A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m512d test_mm512_mask3_fnmadd_round_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) { - // CHECK-LABEL: @test_mm512_mask3_fnmadd_round_pd + // CHECK-LABEL: test_mm512_mask3_fnmadd_round_pd // CHECK: fneg <8 x double> // CHECK: @llvm.x86.avx512.vfmadd.pd.512 // CHECK: bitcast i8 %{{.*}} to <8 x i1> @@ -552,7 +554,7 @@ __m512d test_mm512_mask3_fnmadd_round_pd(__m512d __A, __m512d __B, __m512d __C, return _mm512_mask3_fnmadd_round_pd(__A, __B, __C, __U, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m512d test_mm512_maskz_fnmadd_round_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) { - // CHECK-LABEL: @test_mm512_maskz_fnmadd_round_pd + // CHECK-LABEL: test_mm512_maskz_fnmadd_round_pd // CHECK: fneg <8 x double> // CHECK: @llvm.x86.avx512.vfmadd.pd.512 // CHECK: bitcast i8 %{{.*}} to <8 x i1> @@ -560,14 +562,14 @@ __m512d test_mm512_maskz_fnmadd_round_pd(__mmask8 __U, __m512d __A, __m512d __B, return _mm512_maskz_fnmadd_round_pd(__U, __A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m512d test_mm512_fnmsub_round_pd(__m512d __A, __m512d __B, __m512d __C) { - // CHECK-LABEL: @test_mm512_fnmsub_round_pd + // CHECK-LABEL: test_mm512_fnmsub_round_pd // CHECK: fneg <8 x double> // CHECK: fneg <8 x double> // CHECK: @llvm.x86.avx512.vfmadd.pd.512 return _mm512_fnmsub_round_pd(__A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m512d test_mm512_maskz_fnmsub_round_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) { - // CHECK-LABEL: @test_mm512_maskz_fnmsub_round_pd + // CHECK-LABEL: test_mm512_maskz_fnmsub_round_pd // CHECK: fneg <8 x double> // CHECK: fneg <8 x double> // CHECK: @llvm.x86.avx512.vfmadd.pd.512 @@ -576,125 +578,125 @@ __m512d test_mm512_maskz_fnmsub_round_pd(__mmask8 __U, __m512d __A, __m512d __B, return _mm512_maskz_fnmsub_round_pd(__U, __A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m512d test_mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C) { - // CHECK-LABEL: @test_mm512_fmadd_pd - // CHECK: call <8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}) + // CHECK-LABEL: test_mm512_fmadd_pd + // CHECK: call {{.*}}<8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}) return _mm512_fmadd_pd(__A, __B, __C); } __m512d test_mm512_mask_fmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) { - // CHECK-LABEL: @test_mm512_mask_fmadd_pd - // CHECK: call <8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}) + // CHECK-LABEL: test_mm512_mask_fmadd_pd + // CHECK: call {{.*}}<8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}) // CHECK: bitcast i8 %{{.*}} to <8 x i1> // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} return _mm512_mask_fmadd_pd(__A, __U, __B, __C); } __m512d test_mm512_mask3_fmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) { - // CHECK-LABEL: @test_mm512_mask3_fmadd_pd - // CHECK: call <8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}) + // CHECK-LABEL: test_mm512_mask3_fmadd_pd + // CHECK: call {{.*}}<8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}) // CHECK: bitcast i8 %{{.*}} to <8 x i1> // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} return _mm512_mask3_fmadd_pd(__A, __B, __C, __U); } __m512d test_mm512_maskz_fmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) { - // CHECK-LABEL: @test_mm512_maskz_fmadd_pd - // CHECK: call <8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}) + // CHECK-LABEL: test_mm512_maskz_fmadd_pd + // CHECK: call {{.*}}<8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}) // CHECK: bitcast i8 %{{.*}} to <8 x i1> // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> zeroinitializer return _mm512_maskz_fmadd_pd(__U, __A, __B, __C); } __m512d test_mm512_fmsub_pd(__m512d __A, __m512d __B, __m512d __C) { - // CHECK-LABEL: @test_mm512_fmsub_pd + // CHECK-LABEL: test_mm512_fmsub_pd // CHECK: fneg <8 x double> %{{.*}} - // CHECK: call <8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}) + // CHECK: call {{.*}}<8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}) return _mm512_fmsub_pd(__A, __B, __C); } __m512d test_mm512_mask_fmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) { - // CHECK-LABEL: @test_mm512_mask_fmsub_pd + // CHECK-LABEL: test_mm512_mask_fmsub_pd // CHECK: fneg <8 x double> %{{.*}} - // CHECK: call <8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}) + // CHECK: call {{.*}}<8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}) // CHECK: bitcast i8 %{{.*}} to <8 x i1> // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} return _mm512_mask_fmsub_pd(__A, __U, __B, __C); } __m512d test_mm512_maskz_fmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) { - // CHECK-LABEL: @test_mm512_maskz_fmsub_pd + // CHECK-LABEL: test_mm512_maskz_fmsub_pd // CHECK: fneg <8 x double> %{{.*}} - // CHECK: call <8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}) + // CHECK: call {{.*}}<8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}) // CHECK: bitcast i8 %{{.*}} to <8 x i1> // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> zeroinitializer return _mm512_maskz_fmsub_pd(__U, __A, __B, __C); } __m512d test_mm512_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C) { - // CHECK-LABEL: @test_mm512_fnmadd_pd + // CHECK-LABEL: test_mm512_fnmadd_pd // CHECK: fneg <8 x double> %{{.*}} - // CHECK: call <8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}) + // CHECK: call {{.*}}<8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}) return _mm512_fnmadd_pd(__A, __B, __C); } __m512d test_mm512_mask3_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) { - // CHECK-LABEL: @test_mm512_mask3_fnmadd_pd + // CHECK-LABEL: test_mm512_mask3_fnmadd_pd // CHECK: fneg <8 x double> %{{.*}} - // CHECK: call <8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}) + // CHECK: call {{.*}}<8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}) // CHECK: bitcast i8 %{{.*}} to <8 x i1> // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} return _mm512_mask3_fnmadd_pd(__A, __B, __C, __U); } __m512d test_mm512_maskz_fnmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) { - // CHECK-LABEL: @test_mm512_maskz_fnmadd_pd + // CHECK-LABEL: test_mm512_maskz_fnmadd_pd // CHECK: fneg <8 x double> %{{.*}} - // CHECK: call <8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}) + // CHECK: call {{.*}}<8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}) // CHECK: bitcast i8 %{{.*}} to <8 x i1> // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> zeroinitializer return _mm512_maskz_fnmadd_pd(__U, __A, __B, __C); } __m512d test_mm512_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C) { - // CHECK-LABEL: @test_mm512_fnmsub_pd + // CHECK-LABEL: test_mm512_fnmsub_pd // CHECK: fneg <8 x double> %{{.*}} // CHECK: fneg <8 x double> %{{.*}} - // CHECK: call <8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}) + // CHECK: call {{.*}}<8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}) return _mm512_fnmsub_pd(__A, __B, __C); } __m512d test_mm512_maskz_fnmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) { - // CHECK-LABEL: @test_mm512_maskz_fnmsub_pd + // CHECK-LABEL: test_mm512_maskz_fnmsub_pd // CHECK: fneg <8 x double> %{{.*}} // CHECK: fneg <8 x double> %{{.*}} - // CHECK: call <8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}) + // CHECK: call {{.*}}<8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}) // CHECK: bitcast i8 %{{.*}} to <8 x i1> // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> zeroinitializer return _mm512_maskz_fnmsub_pd(__U, __A, __B, __C); } __m512 test_mm512_fmadd_round_ps(__m512 __A, __m512 __B, __m512 __C) { - // CHECK-LABEL: @test_mm512_fmadd_round_ps + // CHECK-LABEL: test_mm512_fmadd_round_ps // CHECK: @llvm.x86.avx512.vfmadd.ps.512 return _mm512_fmadd_round_ps(__A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m512 test_mm512_mask_fmadd_round_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) { - // CHECK-LABEL: @test_mm512_mask_fmadd_round_ps + // CHECK-LABEL: test_mm512_mask_fmadd_round_ps // CHECK: @llvm.x86.avx512.vfmadd.ps.512 // CHECK: bitcast i16 %{{.*}} to <16 x i1> // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return _mm512_mask_fmadd_round_ps(__A, __U, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m512 test_mm512_mask3_fmadd_round_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) { - // CHECK-LABEL: @test_mm512_mask3_fmadd_round_ps + // CHECK-LABEL: test_mm512_mask3_fmadd_round_ps // CHECK: @llvm.x86.avx512.vfmadd.ps.512 // CHECK: bitcast i16 %{{.*}} to <16 x i1> // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return _mm512_mask3_fmadd_round_ps(__A, __B, __C, __U, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m512 test_mm512_maskz_fmadd_round_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) { - // CHECK-LABEL: @test_mm512_maskz_fmadd_round_ps + // CHECK-LABEL: test_mm512_maskz_fmadd_round_ps // CHECK: @llvm.x86.avx512.vfmadd.ps.512 // CHECK: bitcast i16 %{{.*}} to <16 x i1> // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> zeroinitializer return _mm512_maskz_fmadd_round_ps(__U, __A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m512 test_mm512_fmsub_round_ps(__m512 __A, __m512 __B, __m512 __C) { - // CHECK-LABEL: @test_mm512_fmsub_round_ps + // CHECK-LABEL: test_mm512_fmsub_round_ps // CHECK: fneg <16 x float> %{{.*}} // CHECK: @llvm.x86.avx512.vfmadd.ps.512 return _mm512_fmsub_round_ps(__A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m512 test_mm512_mask_fmsub_round_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) { - // CHECK-LABEL: @test_mm512_mask_fmsub_round_ps + // CHECK-LABEL: test_mm512_mask_fmsub_round_ps // CHECK: fneg <16 x float> %{{.*}} // CHECK: @llvm.x86.avx512.vfmadd.ps.512 // CHECK: bitcast i16 %{{.*}} to <16 x i1> @@ -702,7 +704,7 @@ __m512 test_mm512_mask_fmsub_round_ps(__m512 __A, __mmask16 __U, __m512 __B, __m return _mm512_mask_fmsub_round_ps(__A, __U, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m512 test_mm512_maskz_fmsub_round_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) { - // CHECK-LABEL: @test_mm512_maskz_fmsub_round_ps + // CHECK-LABEL: test_mm512_maskz_fmsub_round_ps // CHECK: fneg <16 x float> %{{.*}} // CHECK: @llvm.x86.avx512.vfmadd.ps.512 // CHECK: bitcast i16 %{{.*}} to <16 x i1> @@ -710,13 +712,13 @@ __m512 test_mm512_maskz_fmsub_round_ps(__mmask16 __U, __m512 __A, __m512 __B, __ return _mm512_maskz_fmsub_round_ps(__U, __A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m512 test_mm512_fnmadd_round_ps(__m512 __A, __m512 __B, __m512 __C) { - // CHECK-LABEL: @test_mm512_fnmadd_round_ps + // CHECK-LABEL: test_mm512_fnmadd_round_ps // CHECK: fneg <16 x float> %{{.*}} // CHECK: @llvm.x86.avx512.vfmadd.ps.512 return _mm512_fnmadd_round_ps(__A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m512 test_mm512_mask3_fnmadd_round_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) { - // CHECK-LABEL: @test_mm512_mask3_fnmadd_round_ps + // CHECK-LABEL: test_mm512_mask3_fnmadd_round_ps // CHECK: fneg <16 x float> %{{.*}} // CHECK: @llvm.x86.avx512.vfmadd.ps.512 // CHECK: bitcast i16 %{{.*}} to <16 x i1> @@ -724,7 +726,7 @@ __m512 test_mm512_mask3_fnmadd_round_ps(__m512 __A, __m512 __B, __m512 __C, __mm return _mm512_mask3_fnmadd_round_ps(__A, __B, __C, __U, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m512 test_mm512_maskz_fnmadd_round_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) { - // CHECK-LABEL: @test_mm512_maskz_fnmadd_round_ps + // CHECK-LABEL: test_mm512_maskz_fnmadd_round_ps // CHECK: fneg <16 x float> %{{.*}} // CHECK: @llvm.x86.avx512.vfmadd.ps.512 // CHECK: bitcast i16 %{{.*}} to <16 x i1> @@ -732,14 +734,14 @@ __m512 test_mm512_maskz_fnmadd_round_ps(__mmask16 __U, __m512 __A, __m512 __B, _ return _mm512_maskz_fnmadd_round_ps(__U, __A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m512 test_mm512_fnmsub_round_ps(__m512 __A, __m512 __B, __m512 __C) { - // CHECK-LABEL: @test_mm512_fnmsub_round_ps + // CHECK-LABEL: test_mm512_fnmsub_round_ps // CHECK: fneg <16 x float> %{{.*}} // CHECK: fneg <16 x float> %{{.*}} // CHECK: @llvm.x86.avx512.vfmadd.ps.512 return _mm512_fnmsub_round_ps(__A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m512 test_mm512_maskz_fnmsub_round_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) { - // CHECK-LABEL: @test_mm512_maskz_fnmsub_round_ps + // CHECK-LABEL: test_mm512_maskz_fnmsub_round_ps // CHECK: fneg <16 x float> %{{.*}} // CHECK: fneg <16 x float> %{{.*}} // CHECK: @llvm.x86.avx512.vfmadd.ps.512 @@ -748,123 +750,123 @@ __m512 test_mm512_maskz_fnmsub_round_ps(__mmask16 __U, __m512 __A, __m512 __B, _ return _mm512_maskz_fnmsub_round_ps(__U, __A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m512 test_mm512_fmadd_ps(__m512 __A, __m512 __B, __m512 __C) { - // CHECK-LABEL: @test_mm512_fmadd_ps - // CHECK: call <16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}) + // CHECK-LABEL: test_mm512_fmadd_ps + // CHECK: call {{.*}}<16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}) return _mm512_fmadd_ps(__A, __B, __C); } __m512 test_mm512_mask_fmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) { - // CHECK-LABEL: @test_mm512_mask_fmadd_ps - // CHECK: call <16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}) + // CHECK-LABEL: test_mm512_mask_fmadd_ps + // CHECK: call {{.*}}<16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}) return _mm512_mask_fmadd_ps(__A, __U, __B, __C); } __m512 test_mm512_mask3_fmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) { - // CHECK-LABEL: @test_mm512_mask3_fmadd_ps - // CHECK: call <16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}) + // CHECK-LABEL: test_mm512_mask3_fmadd_ps + // CHECK: call {{.*}}<16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}) // CHECK: bitcast i16 %{{.*}} to <16 x i1> // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return _mm512_mask3_fmadd_ps(__A, __B, __C, __U); } __m512 test_mm512_maskz_fmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) { - // CHECK-LABEL: @test_mm512_maskz_fmadd_ps - // CHECK: call <16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}) + // CHECK-LABEL: test_mm512_maskz_fmadd_ps + // CHECK: call {{.*}}<16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}) // CHECK: bitcast i16 %{{.*}} to <16 x i1> // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> zeroinitializer return _mm512_maskz_fmadd_ps(__U, __A, __B, __C); } __m512 test_mm512_fmsub_ps(__m512 __A, __m512 __B, __m512 __C) { - // CHECK-LABEL: @test_mm512_fmsub_ps + // CHECK-LABEL: test_mm512_fmsub_ps // CHECK: fneg <16 x float> %{{.*}} - // CHECK: call <16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}) + // CHECK: call {{.*}}<16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}) return _mm512_fmsub_ps(__A, __B, __C); } __m512 test_mm512_mask_fmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) { - // CHECK-LABEL: @test_mm512_mask_fmsub_ps + // CHECK-LABEL: test_mm512_mask_fmsub_ps // CHECK: fneg <16 x float> %{{.*}} - // CHECK: call <16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}) + // CHECK: call {{.*}}<16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}) // CHECK: bitcast i16 %{{.*}} to <16 x i1> // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return _mm512_mask_fmsub_ps(__A, __U, __B, __C); } __m512 test_mm512_maskz_fmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) { - // CHECK-LABEL: @test_mm512_maskz_fmsub_ps + // CHECK-LABEL: test_mm512_maskz_fmsub_ps // CHECK: fneg <16 x float> %{{.*}} - // CHECK: call <16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}) + // CHECK: call {{.*}}<16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}) // CHECK: bitcast i16 %{{.*}} to <16 x i1> // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> zeroinitializer return _mm512_maskz_fmsub_ps(__U, __A, __B, __C); } __m512 test_mm512_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C) { - // CHECK-LABEL: @test_mm512_fnmadd_ps + // CHECK-LABEL: test_mm512_fnmadd_ps // CHECK: fneg <16 x float> %{{.*}} - // CHECK: call <16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}) + // CHECK: call {{.*}}<16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}) return _mm512_fnmadd_ps(__A, __B, __C); } __m512 test_mm512_mask3_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) { - // CHECK-LABEL: @test_mm512_mask3_fnmadd_ps + // CHECK-LABEL: test_mm512_mask3_fnmadd_ps // CHECK: fneg <16 x float> %{{.*}} - // CHECK: call <16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}) + // CHECK: call {{.*}}<16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}) // CHECK: bitcast i16 %{{.*}} to <16 x i1> // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return _mm512_mask3_fnmadd_ps(__A, __B, __C, __U); } __m512 test_mm512_maskz_fnmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) { - // CHECK-LABEL: @test_mm512_maskz_fnmadd_ps + // CHECK-LABEL: test_mm512_maskz_fnmadd_ps // CHECK: fneg <16 x float> %{{.*}} - // CHECK: call <16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}) + // CHECK: call {{.*}}<16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}) // CHECK: bitcast i16 %{{.*}} to <16 x i1> // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> zeroinitializer return _mm512_maskz_fnmadd_ps(__U, __A, __B, __C); } __m512 test_mm512_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C) { - // CHECK-LABEL: @test_mm512_fnmsub_ps + // CHECK-LABEL: test_mm512_fnmsub_ps // CHECK: fneg <16 x float> %{{.*}} // CHECK: fneg <16 x float> %{{.*}} - // CHECK: call <16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}) + // CHECK: call {{.*}}<16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}) return _mm512_fnmsub_ps(__A, __B, __C); } __m512 test_mm512_maskz_fnmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) { - // CHECK-LABEL: @test_mm512_maskz_fnmsub_ps + // CHECK-LABEL: test_mm512_maskz_fnmsub_ps // CHECK: fneg <16 x float> %{{.*}} // CHECK: fneg <16 x float> %{{.*}} - // CHECK: call <16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}) + // CHECK: call {{.*}}<16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}) // CHECK: bitcast i16 %{{.*}} to <16 x i1> // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> zeroinitializer return _mm512_maskz_fnmsub_ps(__U, __A, __B, __C); } __m512d test_mm512_fmaddsub_round_pd(__m512d __A, __m512d __B, __m512d __C) { - // CHECK-LABEL: @test_mm512_fmaddsub_round_pd + // CHECK-LABEL: test_mm512_fmaddsub_round_pd // CHECK: @llvm.x86.avx512.vfmaddsub.pd.512 return _mm512_fmaddsub_round_pd(__A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m512d test_mm512_mask_fmaddsub_round_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) { - // CHECK-LABEL: @test_mm512_mask_fmaddsub_round_pd + // CHECK-LABEL: test_mm512_mask_fmaddsub_round_pd // CHECK: @llvm.x86.avx512.vfmaddsub.pd.512 // CHECK: bitcast i8 %{{.*}} to <8 x i1> // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} return _mm512_mask_fmaddsub_round_pd(__A, __U, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m512d test_mm512_mask3_fmaddsub_round_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) { - // CHECK-LABEL: @test_mm512_mask3_fmaddsub_round_pd + // CHECK-LABEL: test_mm512_mask3_fmaddsub_round_pd // CHECK: @llvm.x86.avx512.vfmaddsub.pd.512 // CHECK: bitcast i8 %{{.*}} to <8 x i1> // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} return _mm512_mask3_fmaddsub_round_pd(__A, __B, __C, __U, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m512d test_mm512_maskz_fmaddsub_round_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) { - // CHECK-LABEL: @test_mm512_maskz_fmaddsub_round_pd + // CHECK-LABEL: test_mm512_maskz_fmaddsub_round_pd // CHECK: @llvm.x86.avx512.vfmaddsub.pd.512 // CHECK: bitcast i8 %{{.*}} to <8 x i1> // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> zeroinitializer return _mm512_maskz_fmaddsub_round_pd(__U, __A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m512d test_mm512_fmsubadd_round_pd(__m512d __A, __m512d __B, __m512d __C) { - // CHECK-LABEL: @test_mm512_fmsubadd_round_pd + // CHECK-LABEL: test_mm512_fmsubadd_round_pd // CHECK: fneg <8 x double> %{{.*}} // CHECK: @llvm.x86.avx512.vfmaddsub.pd.512 return _mm512_fmsubadd_round_pd(__A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m512d test_mm512_mask_fmsubadd_round_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) { - // CHECK-LABEL: @test_mm512_mask_fmsubadd_round_pd + // CHECK-LABEL: test_mm512_mask_fmsubadd_round_pd // CHECK: fneg <8 x double> %{{.*}} // CHECK: @llvm.x86.avx512.vfmaddsub.pd.512 // CHECK: bitcast i8 %{{.*}} to <8 x i1> @@ -872,7 +874,7 @@ __m512d test_mm512_mask_fmsubadd_round_pd(__m512d __A, __mmask8 __U, __m512d __B return _mm512_mask_fmsubadd_round_pd(__A, __U, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m512d test_mm512_maskz_fmsubadd_round_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) { - // CHECK-LABEL: @test_mm512_maskz_fmsubadd_round_pd + // CHECK-LABEL: test_mm512_maskz_fmsubadd_round_pd // CHECK: fneg <8 x double> %{{.*}} // CHECK: @llvm.x86.avx512.vfmaddsub.pd.512 // CHECK: bitcast i8 %{{.*}} to <8 x i1> @@ -880,91 +882,91 @@ __m512d test_mm512_maskz_fmsubadd_round_pd(__mmask8 __U, __m512d __A, __m512d __ return _mm512_maskz_fmsubadd_round_pd(__U, __A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m512d test_mm512_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C) { - // CHECK-LABEL: @test_mm512_fmaddsub_pd + // CHECK-LABEL: test_mm512_fmaddsub_pd // CHECK-NOT: fneg - // CHECK: call <8 x double> @llvm.x86.avx512.vfmaddsub.pd.512(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}, i32 4) + // CHECK: call {{.*}}<8 x double> @llvm.x86.avx512.vfmaddsub.pd.512(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}, i32 4) return _mm512_fmaddsub_pd(__A, __B, __C); } __m512d test_mm512_mask_fmaddsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) { - // CHECK-LABEL: @test_mm512_mask_fmaddsub_pd + // CHECK-LABEL: test_mm512_mask_fmaddsub_pd // CHECK-NOT: fneg - // CHECK: call <8 x double> @llvm.x86.avx512.vfmaddsub.pd.512(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}, i32 4) + // CHECK: call {{.*}}<8 x double> @llvm.x86.avx512.vfmaddsub.pd.512(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}, i32 4) // CHECK: bitcast i8 %{{.*}} to <8 x i1> // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} return _mm512_mask_fmaddsub_pd(__A, __U, __B, __C); } __m512d test_mm512_mask3_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) { - // CHECK-LABEL: @test_mm512_mask3_fmaddsub_pd + // CHECK-LABEL: test_mm512_mask3_fmaddsub_pd // CHECK-NOT: fneg - // CHECK: call <8 x double> @llvm.x86.avx512.vfmaddsub.pd.512(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}, i32 4) + // CHECK: call {{.*}}<8 x double> @llvm.x86.avx512.vfmaddsub.pd.512(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}, i32 4) // CHECK: bitcast i8 %{{.*}} to <8 x i1> // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} return _mm512_mask3_fmaddsub_pd(__A, __B, __C, __U); } __m512d test_mm512_maskz_fmaddsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) { - // CHECK-LABEL: @test_mm512_maskz_fmaddsub_pd + // CHECK-LABEL: test_mm512_maskz_fmaddsub_pd // CHECK-NOT: fneg - // CHECK: call <8 x double> @llvm.x86.avx512.vfmaddsub.pd.512(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}, i32 4) + // CHECK: call {{.*}}<8 x double> @llvm.x86.avx512.vfmaddsub.pd.512(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}, i32 4) // CHECK: bitcast i8 %{{.*}} to <8 x i1> // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> zeroinitializer return _mm512_maskz_fmaddsub_pd(__U, __A, __B, __C); } __m512d test_mm512_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C) { - // CHECK-LABEL: @test_mm512_fmsubadd_pd + // CHECK-LABEL: test_mm512_fmsubadd_pd // CHECK: [[NEG:%.+]] = fneg <8 x double> %{{.*}} - // CHECK: call <8 x double> @llvm.x86.avx512.vfmaddsub.pd.512(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> [[NEG]], i32 4) + // CHECK: call {{.*}}<8 x double> @llvm.x86.avx512.vfmaddsub.pd.512(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> [[NEG]], i32 4) return _mm512_fmsubadd_pd(__A, __B, __C); } __m512d test_mm512_mask_fmsubadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) { - // CHECK-LABEL: @test_mm512_mask_fmsubadd_pd + // CHECK-LABEL: test_mm512_mask_fmsubadd_pd // CHECK: [[NEG:%.+]] = fneg <8 x double> %{{.*}} - // CHECK: call <8 x double> @llvm.x86.avx512.vfmaddsub.pd.512(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> [[NEG]], i32 4) + // CHECK: call {{.*}}<8 x double> @llvm.x86.avx512.vfmaddsub.pd.512(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> [[NEG]], i32 4) // CHECK: bitcast i8 %{{.*}} to <8 x i1> // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} return _mm512_mask_fmsubadd_pd(__A, __U, __B, __C); } __m512d test_mm512_maskz_fmsubadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) { - // CHECK-LABEL: @test_mm512_maskz_fmsubadd_pd + // CHECK-LABEL: test_mm512_maskz_fmsubadd_pd // CHECK: [[NEG:%.+]] = fneg <8 x double> %{{.*}} - // CHECK: call <8 x double> @llvm.x86.avx512.vfmaddsub.pd.512(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> [[NEG]], i32 4) + // CHECK: call {{.*}}<8 x double> @llvm.x86.avx512.vfmaddsub.pd.512(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> [[NEG]], i32 4) // CHECK: bitcast i8 %{{.*}} to <8 x i1> // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> zeroinitializer return _mm512_maskz_fmsubadd_pd(__U, __A, __B, __C); } __m512 test_mm512_fmaddsub_round_ps(__m512 __A, __m512 __B, __m512 __C) { - // CHECK-LABEL: @test_mm512_fmaddsub_round_ps + // CHECK-LABEL: test_mm512_fmaddsub_round_ps // CHECK: @llvm.x86.avx512.vfmaddsub.ps.512 return _mm512_fmaddsub_round_ps(__A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m512 test_mm512_mask_fmaddsub_round_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) { - // CHECK-LABEL: @test_mm512_mask_fmaddsub_round_ps + // CHECK-LABEL: test_mm512_mask_fmaddsub_round_ps // CHECK: @llvm.x86.avx512.vfmaddsub.ps.512 // CHECK: bitcast i16 %{{.*}} to <16 x i1> // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return _mm512_mask_fmaddsub_round_ps(__A, __U, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m512 test_mm512_mask3_fmaddsub_round_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) { - // CHECK-LABEL: @test_mm512_mask3_fmaddsub_round_ps + // CHECK-LABEL: test_mm512_mask3_fmaddsub_round_ps // CHECK: @llvm.x86.avx512.vfmaddsub.ps.512 // CHECK: bitcast i16 %{{.*}} to <16 x i1> // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return _mm512_mask3_fmaddsub_round_ps(__A, __B, __C, __U, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m512 test_mm512_maskz_fmaddsub_round_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) { - // CHECK-LABEL: @test_mm512_maskz_fmaddsub_round_ps + // CHECK-LABEL: test_mm512_maskz_fmaddsub_round_ps // CHECK: @llvm.x86.avx512.vfmaddsub.ps.512 // CHECK: bitcast i16 %{{.*}} to <16 x i1> // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> zeroinitializer return _mm512_maskz_fmaddsub_round_ps(__U, __A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m512 test_mm512_fmsubadd_round_ps(__m512 __A, __m512 __B, __m512 __C) { - // CHECK-LABEL: @test_mm512_fmsubadd_round_ps + // CHECK-LABEL: test_mm512_fmsubadd_round_ps // CHECK: fneg <16 x float> %{{.*}} // CHECK: @llvm.x86.avx512.vfmaddsub.ps.512 return _mm512_fmsubadd_round_ps(__A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m512 test_mm512_mask_fmsubadd_round_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) { - // CHECK-LABEL: @test_mm512_mask_fmsubadd_round_ps + // CHECK-LABEL: test_mm512_mask_fmsubadd_round_ps // CHECK: fneg <16 x float> %{{.*}} // CHECK: @llvm.x86.avx512.vfmaddsub.ps.512 // CHECK: bitcast i16 %{{.*}} to <16 x i1> @@ -972,7 +974,7 @@ __m512 test_mm512_mask_fmsubadd_round_ps(__m512 __A, __mmask16 __U, __m512 __B, return _mm512_mask_fmsubadd_round_ps(__A, __U, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m512 test_mm512_maskz_fmsubadd_round_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) { - // CHECK-LABEL: @test_mm512_maskz_fmsubadd_round_ps + // CHECK-LABEL: test_mm512_maskz_fmsubadd_round_ps // CHECK: fneg <16 x float> %{{.*}} // CHECK: @llvm.x86.avx512.vfmaddsub.ps.512 // CHECK: bitcast i16 %{{.*}} to <16 x i1> @@ -980,59 +982,59 @@ __m512 test_mm512_maskz_fmsubadd_round_ps(__mmask16 __U, __m512 __A, __m512 __B, return _mm512_maskz_fmsubadd_round_ps(__U, __A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m512 test_mm512_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C) { - // CHECK-LABEL: @test_mm512_fmaddsub_ps + // CHECK-LABEL: test_mm512_fmaddsub_ps // CHECK-NOT: fneg - // CHECK: call <16 x float> @llvm.x86.avx512.vfmaddsub.ps.512(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}, i32 4) + // CHECK: call {{.*}}<16 x float> @llvm.x86.avx512.vfmaddsub.ps.512(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}, i32 4) return _mm512_fmaddsub_ps(__A, __B, __C); } __m512 test_mm512_mask_fmaddsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) { - // CHECK-LABEL: @test_mm512_mask_fmaddsub_ps + // CHECK-LABEL: test_mm512_mask_fmaddsub_ps // CHECK-NOT: fneg - // CHECK: call <16 x float> @llvm.x86.avx512.vfmaddsub.ps.512(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}, i32 4) + // CHECK: call {{.*}}<16 x float> @llvm.x86.avx512.vfmaddsub.ps.512(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}, i32 4) // CHECK: bitcast i16 %{{.*}} to <16 x i1> // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return _mm512_mask_fmaddsub_ps(__A, __U, __B, __C); } __m512 test_mm512_mask3_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) { - // CHECK-LABEL: @test_mm512_mask3_fmaddsub_ps + // CHECK-LABEL: test_mm512_mask3_fmaddsub_ps // CHECK-NOT: fneg - // CHECK: call <16 x float> @llvm.x86.avx512.vfmaddsub.ps.512(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}, i32 4) + // CHECK: call {{.*}}<16 x float> @llvm.x86.avx512.vfmaddsub.ps.512(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}, i32 4) // CHECK: bitcast i16 %{{.*}} to <16 x i1> // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return _mm512_mask3_fmaddsub_ps(__A, __B, __C, __U); } __m512 test_mm512_maskz_fmaddsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) { - // CHECK-LABEL: @test_mm512_maskz_fmaddsub_ps + // CHECK-LABEL: test_mm512_maskz_fmaddsub_ps // CHECK-NOT: fneg - // CHECK: call <16 x float> @llvm.x86.avx512.vfmaddsub.ps.512(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}, i32 4) + // CHECK: call {{.*}}<16 x float> @llvm.x86.avx512.vfmaddsub.ps.512(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}, i32 4) // CHECK: bitcast i16 %{{.*}} to <16 x i1> // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> zeroinitializer return _mm512_maskz_fmaddsub_ps(__U, __A, __B, __C); } __m512 test_mm512_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C) { - // CHECK-LABEL: @test_mm512_fmsubadd_ps + // CHECK-LABEL: test_mm512_fmsubadd_ps // CHECK: [[NEG:%.+]] = fneg <16 x float> %{{.*}} - // CHECK: call <16 x float> @llvm.x86.avx512.vfmaddsub.ps.512(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> [[NEG]], i32 4) + // CHECK: call {{.*}}<16 x float> @llvm.x86.avx512.vfmaddsub.ps.512(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> [[NEG]], i32 4) return _mm512_fmsubadd_ps(__A, __B, __C); } __m512 test_mm512_mask_fmsubadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) { - // CHECK-LABEL: @test_mm512_mask_fmsubadd_ps + // CHECK-LABEL: test_mm512_mask_fmsubadd_ps // CHECK: [[NEG:%.+]] = fneg <16 x float> %{{.*}} - // CHECK: call <16 x float> @llvm.x86.avx512.vfmaddsub.ps.512(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> [[NEG]], i32 4) + // CHECK: call {{.*}}<16 x float> @llvm.x86.avx512.vfmaddsub.ps.512(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> [[NEG]], i32 4) // CHECK: bitcast i16 %{{.*}} to <16 x i1> // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return _mm512_mask_fmsubadd_ps(__A, __U, __B, __C); } __m512 test_mm512_maskz_fmsubadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) { - // CHECK-LABEL: @test_mm512_maskz_fmsubadd_ps + // CHECK-LABEL: test_mm512_maskz_fmsubadd_ps // CHECK: [[NEG:%.+]] = fneg <16 x float> %{{.*}} - // CHECK: call <16 x float> @llvm.x86.avx512.vfmaddsub.ps.512(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> [[NEG]], i32 4) + // CHECK: call {{.*}}<16 x float> @llvm.x86.avx512.vfmaddsub.ps.512(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> [[NEG]], i32 4) // CHECK: bitcast i16 %{{.*}} to <16 x i1> // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> zeroinitializer return _mm512_maskz_fmsubadd_ps(__U, __A, __B, __C); } __m512d test_mm512_mask3_fmsub_round_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) { - // CHECK-LABEL: @test_mm512_mask3_fmsub_round_pd + // CHECK-LABEL: test_mm512_mask3_fmsub_round_pd // CHECK: fneg <8 x double> %{{.*}} // CHECK: @llvm.x86.avx512.vfmadd.pd.512 // CHECK: bitcast i8 %{{.*}} to <8 x i1> @@ -1040,15 +1042,15 @@ __m512d test_mm512_mask3_fmsub_round_pd(__m512d __A, __m512d __B, __m512d __C, _ return _mm512_mask3_fmsub_round_pd(__A, __B, __C, __U, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m512d test_mm512_mask3_fmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) { - // CHECK-LABEL: @test_mm512_mask3_fmsub_pd + // CHECK-LABEL: test_mm512_mask3_fmsub_pd // CHECK: fneg <8 x double> %{{.*}} - // CHECK: call <8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}) + // CHECK: call {{.*}}<8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}) // CHECK: bitcast i8 %{{.*}} to <8 x i1> // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} return _mm512_mask3_fmsub_pd(__A, __B, __C, __U); } __m512 test_mm512_mask3_fmsub_round_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) { - // CHECK-LABEL: @test_mm512_mask3_fmsub_round_ps + // CHECK-LABEL: test_mm512_mask3_fmsub_round_ps // CHECK: fneg <16 x float> %{{.*}} // CHECK: @llvm.x86.avx512.vfmadd.ps.512 // CHECK: bitcast i16 %{{.*}} to <16 x i1> @@ -1056,15 +1058,15 @@ __m512 test_mm512_mask3_fmsub_round_ps(__m512 __A, __m512 __B, __m512 __C, __mma return _mm512_mask3_fmsub_round_ps(__A, __B, __C, __U, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m512 test_mm512_mask3_fmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) { - // CHECK-LABEL: @test_mm512_mask3_fmsub_ps + // CHECK-LABEL: test_mm512_mask3_fmsub_ps // CHECK: fneg <16 x float> %{{.*}} - // CHECK: call <16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}) + // CHECK: call {{.*}}<16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}) // CHECK: bitcast i16 %{{.*}} to <16 x i1> // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return _mm512_mask3_fmsub_ps(__A, __B, __C, __U); } __m512d test_mm512_mask3_fmsubadd_round_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) { - // CHECK-LABEL: @test_mm512_mask3_fmsubadd_round_pd + // CHECK-LABEL: test_mm512_mask3_fmsubadd_round_pd // CHECK: fneg <8 x double> %{{.*}} // CHECK: @llvm.x86.avx512.vfmaddsub.pd.512 // CHECK: bitcast i8 %{{.*}} to <8 x i1> @@ -1072,15 +1074,15 @@ __m512d test_mm512_mask3_fmsubadd_round_pd(__m512d __A, __m512d __B, __m512d __C return _mm512_mask3_fmsubadd_round_pd(__A, __B, __C, __U, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m512d test_mm512_mask3_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) { - // CHECK-LABEL: @test_mm512_mask3_fmsubadd_pd + // CHECK-LABEL: test_mm512_mask3_fmsubadd_pd // CHECK: [[NEG:%.+]] = fneg <8 x double> %{{.*}} - // CHECK: call <8 x double> @llvm.x86.avx512.vfmaddsub.pd.512(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> [[NEG]], i32 4) + // CHECK: call {{.*}}<8 x double> @llvm.x86.avx512.vfmaddsub.pd.512(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> [[NEG]], i32 4) // CHECK: bitcast i8 %{{.*}} to <8 x i1> // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} return _mm512_mask3_fmsubadd_pd(__A, __B, __C, __U); } __m512 test_mm512_mask3_fmsubadd_round_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) { - // CHECK-LABEL: @test_mm512_mask3_fmsubadd_round_ps + // CHECK-LABEL: test_mm512_mask3_fmsubadd_round_ps // CHECK: fneg <16 x float> %{{.*}} // CHECK: @llvm.x86.avx512.vfmaddsub.ps.512 // CHECK: bitcast i16 %{{.*}} to <16 x i1> @@ -1088,15 +1090,15 @@ __m512 test_mm512_mask3_fmsubadd_round_ps(__m512 __A, __m512 __B, __m512 __C, __ return _mm512_mask3_fmsubadd_round_ps(__A, __B, __C, __U, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m512 test_mm512_mask3_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) { - // CHECK-LABEL: @test_mm512_mask3_fmsubadd_ps + // CHECK-LABEL: test_mm512_mask3_fmsubadd_ps // CHECK: [[NEG:%.+]] = fneg <16 x float> %{{.*}} - // CHECK: call <16 x float> @llvm.x86.avx512.vfmaddsub.ps.512(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> [[NEG]], i32 4) + // CHECK: call {{.*}}<16 x float> @llvm.x86.avx512.vfmaddsub.ps.512(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> [[NEG]], i32 4) // CHECK: bitcast i16 %{{.*}} to <16 x i1> // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return _mm512_mask3_fmsubadd_ps(__A, __B, __C, __U); } __m512d test_mm512_mask_fnmadd_round_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) { - // CHECK-LABEL: @test_mm512_mask_fnmadd_round_pd + // CHECK-LABEL: test_mm512_mask_fnmadd_round_pd // CHECK: fneg <8 x double> // CHECK: @llvm.x86.avx512.vfmadd.pd.512 // CHECK: bitcast i8 %{{.*}} to <8 x i1> @@ -1104,15 +1106,15 @@ __m512d test_mm512_mask_fnmadd_round_pd(__m512d __A, __mmask8 __U, __m512d __B, return _mm512_mask_fnmadd_round_pd(__A, __U, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m512d test_mm512_mask_fnmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) { - // CHECK-LABEL: @test_mm512_mask_fnmadd_pd + // CHECK-LABEL: test_mm512_mask_fnmadd_pd // CHECK: fneg <8 x double> %{{.*}} - // CHECK: call <8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}) + // CHECK: call {{.*}}<8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}) // CHECK: bitcast i8 %{{.*}} to <8 x i1> // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} return _mm512_mask_fnmadd_pd(__A, __U, __B, __C); } __m512 test_mm512_mask_fnmadd_round_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) { - // CHECK-LABEL: @test_mm512_mask_fnmadd_round_ps + // CHECK-LABEL: test_mm512_mask_fnmadd_round_ps // CHECK: fneg <16 x float> %{{.*}} // CHECK: @llvm.x86.avx512.vfmadd.ps.512 // CHECK: bitcast i16 %{{.*}} to <16 x i1> @@ -1120,15 +1122,15 @@ __m512 test_mm512_mask_fnmadd_round_ps(__m512 __A, __mmask16 __U, __m512 __B, __ return _mm512_mask_fnmadd_round_ps(__A, __U, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m512 test_mm512_mask_fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) { - // CHECK-LABEL: @test_mm512_mask_fnmadd_ps + // CHECK-LABEL: test_mm512_mask_fnmadd_ps // CHECK: fneg <16 x float> %{{.*}} - // CHECK: call <16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}) + // CHECK: call {{.*}}<16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}) // CHECK: bitcast i16 %{{.*}} to <16 x i1> // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return _mm512_mask_fnmadd_ps(__A, __U, __B, __C); } __m512d test_mm512_mask_fnmsub_round_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) { - // CHECK-LABEL: @test_mm512_mask_fnmsub_round_pd + // CHECK-LABEL: test_mm512_mask_fnmsub_round_pd // CHECK: fneg <8 x double> // CHECK: fneg <8 x double> // CHECK: @llvm.x86.avx512.vfmadd.pd.512 @@ -1137,7 +1139,7 @@ __m512d test_mm512_mask_fnmsub_round_pd(__m512d __A, __mmask8 __U, __m512d __B, return _mm512_mask_fnmsub_round_pd(__A, __U, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m512d test_mm512_mask3_fnmsub_round_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) { - // CHECK-LABEL: @test_mm512_mask3_fnmsub_round_pd + // CHECK-LABEL: test_mm512_mask3_fnmsub_round_pd // CHECK: fneg <8 x double> // CHECK: fneg <8 x double> // CHECK: @llvm.x86.avx512.vfmadd.pd.512 @@ -1146,25 +1148,25 @@ __m512d test_mm512_mask3_fnmsub_round_pd(__m512d __A, __m512d __B, __m512d __C, return _mm512_mask3_fnmsub_round_pd(__A, __B, __C, __U, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m512d test_mm512_mask_fnmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) { - // CHECK-LABEL: @test_mm512_mask_fnmsub_pd + // CHECK-LABEL: test_mm512_mask_fnmsub_pd // CHECK: fneg <8 x double> %{{.*}} // CHECK: fneg <8 x double> %{{.*}} - // CHECK: call <8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}) + // CHECK: call {{.*}}<8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}) // CHECK: bitcast i8 %{{.*}} to <8 x i1> // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} return _mm512_mask_fnmsub_pd(__A, __U, __B, __C); } __m512d test_mm512_mask3_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) { - // CHECK-LABEL: @test_mm512_mask3_fnmsub_pd + // CHECK-LABEL: test_mm512_mask3_fnmsub_pd // CHECK: fneg <8 x double> %{{.*}} // CHECK: fneg <8 x double> %{{.*}} - // CHECK: call <8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}) + // CHECK: call {{.*}}<8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}) // CHECK: bitcast i8 %{{.*}} to <8 x i1> // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} return _mm512_mask3_fnmsub_pd(__A, __B, __C, __U); } __m512 test_mm512_mask_fnmsub_round_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) { - // CHECK-LABEL: @test_mm512_mask_fnmsub_round_ps + // CHECK-LABEL: test_mm512_mask_fnmsub_round_ps // CHECK: fneg <16 x float> %{{.*}} // CHECK: fneg <16 x float> %{{.*}} // CHECK: @llvm.x86.avx512.vfmadd.ps.512 @@ -1173,7 +1175,7 @@ __m512 test_mm512_mask_fnmsub_round_ps(__m512 __A, __mmask16 __U, __m512 __B, __ return _mm512_mask_fnmsub_round_ps(__A, __U, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m512 test_mm512_mask3_fnmsub_round_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) { - // CHECK-LABEL: @test_mm512_mask3_fnmsub_round_ps + // CHECK-LABEL: test_mm512_mask3_fnmsub_round_ps // CHECK: fneg <16 x float> %{{.*}} // CHECK: fneg <16 x float> %{{.*}} // CHECK: @llvm.x86.avx512.vfmadd.ps.512 @@ -1182,119 +1184,119 @@ __m512 test_mm512_mask3_fnmsub_round_ps(__m512 __A, __m512 __B, __m512 __C, __mm return _mm512_mask3_fnmsub_round_ps(__A, __B, __C, __U, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m512 test_mm512_mask_fnmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) { - // CHECK-LABEL: @test_mm512_mask_fnmsub_ps + // CHECK-LABEL: test_mm512_mask_fnmsub_ps // CHECK: fneg <16 x float> %{{.*}} // CHECK: fneg <16 x float> %{{.*}} - // CHECK: call <16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}) + // CHECK: call {{.*}}<16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}) // CHECK: bitcast i16 %{{.*}} to <16 x i1> // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return _mm512_mask_fnmsub_ps(__A, __U, __B, __C); } __m512 test_mm512_mask3_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) { - // CHECK-LABEL: @test_mm512_mask3_fnmsub_ps + // CHECK-LABEL: test_mm512_mask3_fnmsub_ps // CHECK: fneg <16 x float> %{{.*}} // CHECK: fneg <16 x float> %{{.*}} - // CHECK: call <16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}) + // CHECK: call {{.*}}<16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}) // CHECK: bitcast i16 %{{.*}} to <16 x i1> // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return _mm512_mask3_fnmsub_ps(__A, __B, __C, __U); } __mmask16 test_mm512_cmpeq_epi32_mask(__m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_cmpeq_epi32_mask + // CHECK-LABEL: test_mm512_cmpeq_epi32_mask // CHECK: icmp eq <16 x i32> %{{.*}}, %{{.*}} return (__mmask16)_mm512_cmpeq_epi32_mask(__a, __b); } __mmask16 test_mm512_mask_cmpeq_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_mask_cmpeq_epi32_mask + // CHECK-LABEL: test_mm512_mask_cmpeq_epi32_mask // CHECK: icmp eq <16 x i32> %{{.*}}, %{{.*}} // CHECK: and <16 x i1> %{{.*}}, %{{.*}} return (__mmask16)_mm512_mask_cmpeq_epi32_mask(__u, __a, __b); } __mmask8 test_mm512_mask_cmpeq_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_mask_cmpeq_epi64_mask + // CHECK-LABEL: test_mm512_mask_cmpeq_epi64_mask // CHECK: icmp eq <8 x i64> %{{.*}}, %{{.*}} // CHECK: and <8 x i1> %{{.*}}, %{{.*}} return (__mmask8)_mm512_mask_cmpeq_epi64_mask(__u, __a, __b); } __mmask8 test_mm512_cmpeq_epi64_mask(__m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_cmpeq_epi64_mask + // CHECK-LABEL: test_mm512_cmpeq_epi64_mask // CHECK: icmp eq <8 x i64> %{{.*}}, %{{.*}} return (__mmask8)_mm512_cmpeq_epi64_mask(__a, __b); } __mmask16 test_mm512_cmpgt_epi32_mask(__m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_cmpgt_epi32_mask + // CHECK-LABEL: test_mm512_cmpgt_epi32_mask // CHECK: icmp sgt <16 x i32> %{{.*}}, %{{.*}} return (__mmask16)_mm512_cmpgt_epi32_mask(__a, __b); } __mmask16 test_mm512_mask_cmpgt_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_mask_cmpgt_epi32_mask + // CHECK-LABEL: test_mm512_mask_cmpgt_epi32_mask // CHECK: icmp sgt <16 x i32> %{{.*}}, %{{.*}} // CHECK: and <16 x i1> %{{.*}}, %{{.*}} return (__mmask16)_mm512_mask_cmpgt_epi32_mask(__u, __a, __b); } __mmask8 test_mm512_mask_cmpgt_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_mask_cmpgt_epi64_mask + // CHECK-LABEL: test_mm512_mask_cmpgt_epi64_mask // CHECK: icmp sgt <8 x i64> %{{.*}}, %{{.*}} // CHECK: and <8 x i1> %{{.*}}, %{{.*}} return (__mmask8)_mm512_mask_cmpgt_epi64_mask(__u, __a, __b); } __mmask8 test_mm512_cmpgt_epi64_mask(__m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_cmpgt_epi64_mask + // CHECK-LABEL: test_mm512_cmpgt_epi64_mask // CHECK: icmp sgt <8 x i64> %{{.*}}, %{{.*}} return (__mmask8)_mm512_cmpgt_epi64_mask(__a, __b); } __m512d test_mm512_unpackhi_pd(__m512d a, __m512d b) { - // CHECK-LABEL: @test_mm512_unpackhi_pd + // CHECK-LABEL: test_mm512_unpackhi_pd // CHECK: shufflevector <8 x double> {{.*}} return _mm512_unpackhi_pd(a, b); } __m512d test_mm512_unpacklo_pd(__m512d a, __m512d b) { - // CHECK-LABEL: @test_mm512_unpacklo_pd + // CHECK-LABEL: test_mm512_unpacklo_pd // CHECK: shufflevector <8 x double> {{.*}} return _mm512_unpacklo_pd(a, b); } __m512 test_mm512_unpackhi_ps(__m512 a, __m512 b) { - // CHECK-LABEL: @test_mm512_unpackhi_ps + // CHECK-LABEL: test_mm512_unpackhi_ps // CHECK: shufflevector <16 x float> {{.*}} return _mm512_unpackhi_ps(a, b); } __m512 test_mm512_unpacklo_ps(__m512 a, __m512 b) { - // CHECK-LABEL: @test_mm512_unpacklo_ps + // CHECK-LABEL: test_mm512_unpacklo_ps // CHECK: shufflevector <16 x float> {{.*}} return _mm512_unpacklo_ps(a, b); } __mmask16 test_mm512_cmp_round_ps_mask(__m512 a, __m512 b) { - // CHECK-LABEL: @test_mm512_cmp_round_ps_mask + // CHECK-LABEL: test_mm512_cmp_round_ps_mask // CHECK: fcmp oeq <16 x float> %{{.*}}, %{{.*}} return _mm512_cmp_round_ps_mask(a, b, _CMP_EQ_OQ, _MM_FROUND_NO_EXC); } __mmask16 test_mm512_mask_cmp_round_ps_mask(__mmask16 m, __m512 a, __m512 b) { - // CHECK-LABEL: @test_mm512_mask_cmp_round_ps_mask + // CHECK-LABEL: test_mm512_mask_cmp_round_ps_mask // CHECK: [[CMP:%.*]] = fcmp oeq <16 x float> %{{.*}}, %{{.*}} // CHECK: and <16 x i1> [[CMP]], {{.*}} return _mm512_mask_cmp_round_ps_mask(m, a, b, _CMP_EQ_OQ, _MM_FROUND_NO_EXC); } __mmask16 test_mm512_cmp_ps_mask_eq_oq(__m512 a, __m512 b) { - // CHECK-LABEL: @test_mm512_cmp_ps_mask_eq_oq + // CHECK-LABEL: test_mm512_cmp_ps_mask_eq_oq // CHECK: fcmp oeq <16 x float> %{{.*}}, %{{.*}} return _mm512_cmp_ps_mask(a, b, _CMP_EQ_OQ); } @@ -1486,7 +1488,7 @@ __mmask16 test_mm512_cmp_ps_mask_true_us(__m512 a, __m512 b) { } __mmask16 test_mm512_mask_cmp_ps_mask_eq_oq(__mmask16 m, __m512 a, __m512 b) { - // CHECK-LABEL: @test_mm512_mask_cmp_ps_mask_eq_oq + // CHECK-LABEL: test_mm512_mask_cmp_ps_mask_eq_oq // CHECK: [[CMP:%.*]] = fcmp oeq <16 x float> %{{.*}}, %{{.*}} // CHECK: and <16 x i1> [[CMP]], {{.*}} return _mm512_mask_cmp_ps_mask(m, a, b, _CMP_EQ_OQ); @@ -1710,20 +1712,20 @@ __mmask16 test_mm512_mask_cmp_ps_mask_true_us(__mmask16 m, __m512 a, __m512 b) { } __mmask8 test_mm512_cmp_round_pd_mask(__m512d a, __m512d b) { - // CHECK-LABEL: @test_mm512_cmp_round_pd_mask + // CHECK-LABEL: test_mm512_cmp_round_pd_mask // CHECK: [[CMP:%.*]] = fcmp oeq <8 x double> %{{.*}}, %{{.*}} return _mm512_cmp_round_pd_mask(a, b, _CMP_EQ_OQ, _MM_FROUND_NO_EXC); } __mmask8 test_mm512_mask_cmp_round_pd_mask(__mmask8 m, __m512d a, __m512d b) { - // CHECK-LABEL: @test_mm512_mask_cmp_round_pd_mask + // CHECK-LABEL: test_mm512_mask_cmp_round_pd_mask // CHECK: [[CMP:%.*]] = fcmp oeq <8 x double> %{{.*}}, %{{.*}} // CHECK: and <8 x i1> [[CMP]], {{.*}} return _mm512_mask_cmp_round_pd_mask(m, a, b, _CMP_EQ_OQ, _MM_FROUND_NO_EXC); } __mmask8 test_mm512_cmp_pd_mask_eq_oq(__m512d a, __m512d b) { - // CHECK-LABEL: @test_mm512_cmp_pd_mask_eq_oq + // CHECK-LABEL: test_mm512_cmp_pd_mask_eq_oq // CHECK: fcmp oeq <8 x double> %{{.*}}, %{{.*}} return _mm512_cmp_pd_mask(a, b, _CMP_EQ_OQ); } @@ -1915,7 +1917,7 @@ __mmask8 test_mm512_cmp_pd_mask_true_us(__m512d a, __m512d b) { } __mmask8 test_mm512_mask_cmp_pd_mask_eq_oq(__mmask8 m, __m512d a, __m512d b) { - // CHECK-LABEL: @test_mm512_mask_cmp_pd_mask_eq_oq + // CHECK-LABEL: test_mm512_mask_cmp_pd_mask_eq_oq // CHECK: [[CMP:%.*]] = fcmp oeq <8 x double> %{{.*}}, %{{.*}} // CHECK: and <8 x i1> [[CMP]], {{.*}} return _mm512_mask_cmp_pd_mask(m, a, b, _CMP_EQ_OQ); @@ -2139,215 +2141,215 @@ __mmask8 test_mm512_mask_cmp_pd_mask_true_us(__mmask8 m, __m512d a, __m512d b) { } __mmask8 test_mm512_mask_cmp_pd_mask(__mmask8 m, __m512d a, __m512d b) { - // CHECK-LABEL: @test_mm512_mask_cmp_pd_mask + // CHECK-LABEL: test_mm512_mask_cmp_pd_mask // CHECK: [[CMP:%.*]] = fcmp oeq <8 x double> %{{.*}}, %{{.*}} // CHECK: and <8 x i1> [[CMP]], {{.*}} return _mm512_mask_cmp_pd_mask(m, a, b, 0); } __mmask8 test_mm512_cmpeq_pd_mask(__m512d a, __m512d b) { - // CHECK-LABEL: @test_mm512_cmpeq_pd_mask + // CHECK-LABEL: test_mm512_cmpeq_pd_mask // CHECK: fcmp oeq <8 x double> %{{.*}}, %{{.*}} return _mm512_cmpeq_pd_mask(a, b); } __mmask16 test_mm512_cmpeq_ps_mask(__m512 a, __m512 b) { - // CHECK-LABEL: @test_mm512_cmpeq_ps_mask + // CHECK-LABEL: test_mm512_cmpeq_ps_mask // CHECK: fcmp oeq <16 x float> %{{.*}}, %{{.*}} return _mm512_cmpeq_ps_mask(a, b); } __mmask8 test_mm512_mask_cmpeq_pd_mask(__mmask8 k, __m512d a, __m512d b) { - // CHECK-LABEL: @test_mm512_mask_cmpeq_pd_mask + // CHECK-LABEL: test_mm512_mask_cmpeq_pd_mask // CHECK: [[CMP:%.*]] = fcmp oeq <8 x double> %{{.*}}, %{{.*}} // CHECK: and <8 x i1> [[CMP]], {{.*}} return _mm512_mask_cmpeq_pd_mask(k, a, b); } __mmask16 test_mm512_mask_cmpeq_ps_mask(__mmask16 k, __m512 a, __m512 b) { - // CHECK-LABEL: @test_mm512_mask_cmpeq_ps_mask + // CHECK-LABEL: test_mm512_mask_cmpeq_ps_mask // CHECK: [[CMP:%.*]] = fcmp oeq <16 x float> %{{.*}}, %{{.*}} // CHECK: and <16 x i1> [[CMP]], {{.*}} return _mm512_mask_cmpeq_ps_mask(k, a, b); } __mmask8 test_mm512_cmple_pd_mask(__m512d a, __m512d b) { - // CHECK-LABEL: @test_mm512_cmple_pd_mask + // CHECK-LABEL: test_mm512_cmple_pd_mask // CHECK: fcmp ole <8 x double> %{{.*}}, %{{.*}} return _mm512_cmple_pd_mask(a, b); } __mmask16 test_mm512_cmple_ps_mask(__m512 a, __m512 b) { - // CHECK-LABEL: @test_mm512_cmple_ps_mask + // CHECK-LABEL: test_mm512_cmple_ps_mask // CHECK: fcmp ole <16 x float> %{{.*}}, %{{.*}} return _mm512_cmple_ps_mask(a, b); } __mmask8 test_mm512_mask_cmple_pd_mask(__mmask8 k, __m512d a, __m512d b) { - // CHECK-LABEL: @test_mm512_mask_cmple_pd_mask + // CHECK-LABEL: test_mm512_mask_cmple_pd_mask // CHECK: [[CMP:%.*]] = fcmp ole <8 x double> %{{.*}}, %{{.*}} // CHECK: and <8 x i1> [[CMP]], {{.*}} return _mm512_mask_cmple_pd_mask(k, a, b); } __mmask16 test_mm512_mask_cmple_ps_mask(__mmask16 k, __m512 a, __m512 b) { - // CHECK-LABEL: @test_mm512_mask_cmple_ps_mask + // CHECK-LABEL: test_mm512_mask_cmple_ps_mask // CHECK: [[CMP:%.*]] = fcmp ole <16 x float> %{{.*}}, %{{.*}} // CHECK: and <16 x i1> [[CMP]], {{.*}} return _mm512_mask_cmple_ps_mask(k, a, b); } __mmask8 test_mm512_cmplt_pd_mask(__m512d a, __m512d b) { - // CHECK-LABEL: @test_mm512_cmplt_pd_mask + // CHECK-LABEL: test_mm512_cmplt_pd_mask // CHECK: fcmp olt <8 x double> %{{.*}}, %{{.*}} return _mm512_cmplt_pd_mask(a, b); } __mmask16 test_mm512_cmplt_ps_mask(__m512 a, __m512 b) { - // CHECK-LABEL: @test_mm512_cmplt_ps_mask + // CHECK-LABEL: test_mm512_cmplt_ps_mask // CHECK: fcmp olt <16 x float> %{{.*}}, %{{.*}} return _mm512_cmplt_ps_mask(a, b); } __mmask8 test_mm512_mask_cmplt_pd_mask(__mmask8 k, __m512d a, __m512d b) { - // CHECK-LABEL: @test_mm512_mask_cmplt_pd_mask + // CHECK-LABEL: test_mm512_mask_cmplt_pd_mask // CHECK: [[CMP:%.*]] = fcmp olt <8 x double> %{{.*}}, %{{.*}} // CHECK: and <8 x i1> [[CMP]], {{.*}} return _mm512_mask_cmplt_pd_mask(k, a, b); } __mmask16 test_mm512_mask_cmplt_ps_mask(__mmask16 k, __m512 a, __m512 b) { - // CHECK-LABEL: @test_mm512_mask_cmplt_ps_mask + // CHECK-LABEL: test_mm512_mask_cmplt_ps_mask // CHECK: [[CMP:%.*]] = fcmp olt <16 x float> %{{.*}}, %{{.*}} // CHECK: and <16 x i1> [[CMP]], {{.*}} return _mm512_mask_cmplt_ps_mask(k, a, b); } __mmask8 test_mm512_cmpneq_pd_mask(__m512d a, __m512d b) { - // CHECK-LABEL: @test_mm512_cmpneq_pd_mask + // CHECK-LABEL: test_mm512_cmpneq_pd_mask // CHECK: fcmp une <8 x double> %{{.*}}, %{{.*}} return _mm512_cmpneq_pd_mask(a, b); } __mmask16 test_mm512_cmpneq_ps_mask(__m512 a, __m512 b) { - // CHECK-LABEL: @test_mm512_cmpneq_ps_mask + // CHECK-LABEL: test_mm512_cmpneq_ps_mask // CHECK: fcmp une <16 x float> %{{.*}}, %{{.*}} return _mm512_cmpneq_ps_mask(a, b); } __mmask8 test_mm512_mask_cmpneq_pd_mask(__mmask8 k, __m512d a, __m512d b) { - // CHECK-LABEL: @test_mm512_mask_cmpneq_pd_mask + // CHECK-LABEL: test_mm512_mask_cmpneq_pd_mask // CHECK: [[CMP:%.*]] = fcmp une <8 x double> %{{.*}}, %{{.*}} // CHECK: and <8 x i1> [[CMP]], {{.*}} return _mm512_mask_cmpneq_pd_mask(k, a, b); } __mmask16 test_mm512_mask_cmpneq_ps_mask(__mmask16 k, __m512 a, __m512 b) { - // CHECK-LABEL: @test_mm512_mask_cmpneq_ps_mask + // CHECK-LABEL: test_mm512_mask_cmpneq_ps_mask // CHECK: [[CMP:%.*]] = fcmp une <16 x float> %{{.*}}, %{{.*}} // CHECK: and <16 x i1> [[CMP]], {{.*}} return _mm512_mask_cmpneq_ps_mask(k, a, b); } __mmask8 test_mm512_cmpnle_pd_mask(__m512d a, __m512d b) { - // CHECK-LABEL: @test_mm512_cmpnle_pd_mask + // CHECK-LABEL: test_mm512_cmpnle_pd_mask // CHECK: fcmp ugt <8 x double> %{{.*}}, %{{.*}} return _mm512_cmpnle_pd_mask(a, b); } __mmask16 test_mm512_cmpnle_ps_mask(__m512 a, __m512 b) { - // CHECK-LABEL: @test_mm512_cmpnle_ps_mask + // CHECK-LABEL: test_mm512_cmpnle_ps_mask // CHECK: fcmp ugt <16 x float> %{{.*}}, %{{.*}} return _mm512_cmpnle_ps_mask(a, b); } __mmask8 test_mm512_mask_cmpnle_pd_mask(__mmask8 k, __m512d a, __m512d b) { - // CHECK-LABEL: @test_mm512_mask_cmpnle_pd_mask + // CHECK-LABEL: test_mm512_mask_cmpnle_pd_mask // CHECK: [[CMP:%.*]] = fcmp ugt <8 x double> %{{.*}}, %{{.*}} // CHECK: and <8 x i1> [[CMP]], {{.*}} return _mm512_mask_cmpnle_pd_mask(k, a, b); } __mmask16 test_mm512_mask_cmpnle_ps_mask(__mmask16 k, __m512 a, __m512 b) { - // CHECK-LABEL: @test_mm512_mask_cmpnle_ps_mask + // CHECK-LABEL: test_mm512_mask_cmpnle_ps_mask // CHECK: [[CMP:%.*]] = fcmp ugt <16 x float> %{{.*}}, %{{.*}} // CHECK: and <16 x i1> [[CMP]], {{.*}} return _mm512_mask_cmpnle_ps_mask(k, a, b); } __mmask8 test_mm512_cmpnlt_pd_mask(__m512d a, __m512d b) { - // CHECK-LABEL: @test_mm512_cmpnlt_pd_mask + // CHECK-LABEL: test_mm512_cmpnlt_pd_mask // CHECK: fcmp uge <8 x double> %{{.*}}, %{{.*}} return _mm512_cmpnlt_pd_mask(a, b); } __mmask16 test_mm512_cmpnlt_ps_mask(__m512 a, __m512 b) { - // CHECK-LABEL: @test_mm512_cmpnlt_ps_mask + // CHECK-LABEL: test_mm512_cmpnlt_ps_mask // CHECK: fcmp uge <16 x float> %{{.*}}, %{{.*}} return _mm512_cmpnlt_ps_mask(a, b); } __mmask8 test_mm512_mask_cmpnlt_pd_mask(__mmask8 k, __m512d a, __m512d b) { - // CHECK-LABEL: @test_mm512_mask_cmpnlt_pd_mask + // CHECK-LABEL: test_mm512_mask_cmpnlt_pd_mask // CHECK: [[CMP:%.*]] = fcmp uge <8 x double> %{{.*}}, %{{.*}} // CHECK: and <8 x i1> [[CMP]], {{.*}} return _mm512_mask_cmpnlt_pd_mask(k, a, b); } __mmask16 test_mm512_mask_cmpnlt_ps_mask(__mmask16 k, __m512 a, __m512 b) { - // CHECK-LABEL: @test_mm512_mask_cmpnlt_ps_mask + // CHECK-LABEL: test_mm512_mask_cmpnlt_ps_mask // CHECK: [[CMP:%.*]] = fcmp uge <16 x float> %{{.*}}, %{{.*}} // CHECK: and <16 x i1> [[CMP]], {{.*}} return _mm512_mask_cmpnlt_ps_mask(k, a, b); } __mmask8 test_mm512_cmpord_pd_mask(__m512d a, __m512d b) { - // CHECK-LABEL: @test_mm512_cmpord_pd_mask + // CHECK-LABEL: test_mm512_cmpord_pd_mask // CHECK: fcmp ord <8 x double> %{{.*}}, %{{.*}} return _mm512_cmpord_pd_mask(a, b); } __mmask16 test_mm512_cmpord_ps_mask(__m512 a, __m512 b) { - // CHECK-LABEL: @test_mm512_cmpord_ps_mask + // CHECK-LABEL: test_mm512_cmpord_ps_mask // CHECK: fcmp ord <16 x float> %{{.*}}, %{{.*}} return _mm512_cmpord_ps_mask(a, b); } __mmask8 test_mm512_mask_cmpord_pd_mask(__mmask8 k, __m512d a, __m512d b) { - // CHECK-LABEL: @test_mm512_mask_cmpord_pd_mask + // CHECK-LABEL: test_mm512_mask_cmpord_pd_mask // CHECK: [[CMP:%.*]] = fcmp ord <8 x double> %{{.*}}, %{{.*}} // CHECK: and <8 x i1> [[CMP]], {{.*}} return _mm512_mask_cmpord_pd_mask(k, a, b); } __mmask16 test_mm512_mask_cmpord_ps_mask(__mmask16 k, __m512 a, __m512 b) { - // CHECK-LABEL: @test_mm512_mask_cmpord_ps_mask + // CHECK-LABEL: test_mm512_mask_cmpord_ps_mask // CHECK: [[CMP:%.*]] = fcmp ord <16 x float> %{{.*}}, %{{.*}} // CHECK: and <16 x i1> [[CMP]], {{.*}} return _mm512_mask_cmpord_ps_mask(k, a, b); } __mmask8 test_mm512_cmpunord_pd_mask(__m512d a, __m512d b) { - // CHECK-LABEL: @test_mm512_cmpunord_pd_mask + // CHECK-LABEL: test_mm512_cmpunord_pd_mask // CHECK: fcmp uno <8 x double> %{{.*}}, %{{.*}} return _mm512_cmpunord_pd_mask(a, b); } __mmask16 test_mm512_cmpunord_ps_mask(__m512 a, __m512 b) { - // CHECK-LABEL: @test_mm512_cmpunord_ps_mask + // CHECK-LABEL: test_mm512_cmpunord_ps_mask // CHECK: fcmp uno <16 x float> %{{.*}}, %{{.*}} return _mm512_cmpunord_ps_mask(a, b); } __mmask8 test_mm512_mask_cmpunord_pd_mask(__mmask8 k, __m512d a, __m512d b) { - // CHECK-LABEL: @test_mm512_mask_cmpunord_pd_mask + // CHECK-LABEL: test_mm512_mask_cmpunord_pd_mask // CHECK: [[CMP:%.*]] = fcmp uno <8 x double> %{{.*}}, %{{.*}} // CHECK: and <8 x i1> [[CMP]], {{.*}} return _mm512_mask_cmpunord_pd_mask(k, a, b); } __mmask16 test_mm512_mask_cmpunord_ps_mask(__mmask16 k, __m512 a, __m512 b) { - // CHECK-LABEL: @test_mm512_mask_cmpunord_ps_mask + // CHECK-LABEL: test_mm512_mask_cmpunord_ps_mask // CHECK: [[CMP:%.*]] = fcmp uno <16 x float> %{{.*}}, %{{.*}} // CHECK: and <16 x i1> [[CMP]], {{.*}} return _mm512_mask_cmpunord_ps_mask(k, a, b); @@ -2355,20 +2357,20 @@ __mmask16 test_mm512_mask_cmpunord_ps_mask(__mmask16 k, __m512 a, __m512 b) { __m256d test_mm512_extractf64x4_pd(__m512d a) { - // CHECK-LABEL: @test_mm512_extractf64x4_pd + // CHECK-LABEL: test_mm512_extractf64x4_pd // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> poison, <4 x i32> return _mm512_extractf64x4_pd(a, 1); } __m256d test_mm512_mask_extractf64x4_pd(__m256d __W,__mmask8 __U,__m512d __A){ - // CHECK-LABEL:@test_mm512_mask_extractf64x4_pd + // CHECK-LABEL: test_mm512_mask_extractf64x4_pd // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> poison, <4 x i32> // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} return _mm512_mask_extractf64x4_pd( __W, __U, __A, 1); } __m256d test_mm512_maskz_extractf64x4_pd(__mmask8 __U,__m512d __A){ - // CHECK-LABEL:@test_mm512_maskz_extractf64x4_pd + // CHECK-LABEL: test_mm512_maskz_extractf64x4_pd // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> poison, <4 x i32> // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} return _mm512_maskz_extractf64x4_pd( __U, __A, 1); @@ -2376,339 +2378,339 @@ __m256d test_mm512_maskz_extractf64x4_pd(__mmask8 __U,__m512d __A){ __m128 test_mm512_extractf32x4_ps(__m512 a) { - // CHECK-LABEL: @test_mm512_extractf32x4_ps + // CHECK-LABEL: test_mm512_extractf32x4_ps // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> poison, <4 x i32> return _mm512_extractf32x4_ps(a, 1); } __m128 test_mm512_mask_extractf32x4_ps(__m128 __W, __mmask8 __U,__m512 __A){ - // CHECK-LABEL:@test_mm512_mask_extractf32x4_ps + // CHECK-LABEL: test_mm512_mask_extractf32x4_ps // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> poison, <4 x i32> // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} return _mm512_mask_extractf32x4_ps( __W, __U, __A, 1); } __m128 test_mm512_maskz_extractf32x4_ps( __mmask8 __U,__m512 __A){ - // CHECK-LABEL:@test_mm512_maskz_extractf32x4_ps + // CHECK-LABEL: test_mm512_maskz_extractf32x4_ps // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> poison, <4 x i32> // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} return _mm512_maskz_extractf32x4_ps(__U, __A, 1); } __mmask16 test_mm512_cmpeq_epu32_mask(__m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_cmpeq_epu32_mask + // CHECK-LABEL: test_mm512_cmpeq_epu32_mask // CHECK: icmp eq <16 x i32> %{{.*}}, %{{.*}} return (__mmask16)_mm512_cmpeq_epu32_mask(__a, __b); } __mmask16 test_mm512_mask_cmpeq_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_mask_cmpeq_epu32_mask + // CHECK-LABEL: test_mm512_mask_cmpeq_epu32_mask // CHECK: icmp eq <16 x i32> %{{.*}}, %{{.*}} // CHECK: and <16 x i1> %{{.*}}, %{{.*}} return (__mmask16)_mm512_mask_cmpeq_epu32_mask(__u, __a, __b); } __mmask8 test_mm512_cmpeq_epu64_mask(__m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_cmpeq_epu64_mask + // CHECK-LABEL: test_mm512_cmpeq_epu64_mask // CHECK: icmp eq <8 x i64> %{{.*}}, %{{.*}} return (__mmask8)_mm512_cmpeq_epu64_mask(__a, __b); } __mmask8 test_mm512_mask_cmpeq_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_mask_cmpeq_epu64_mask + // CHECK-LABEL: test_mm512_mask_cmpeq_epu64_mask // CHECK: icmp eq <8 x i64> %{{.*}}, %{{.*}} // CHECK: and <8 x i1> %{{.*}}, %{{.*}} return (__mmask8)_mm512_mask_cmpeq_epu64_mask(__u, __a, __b); } __mmask16 test_mm512_cmpge_epi32_mask(__m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_cmpge_epi32_mask + // CHECK-LABEL: test_mm512_cmpge_epi32_mask // CHECK: icmp sge <16 x i32> %{{.*}}, %{{.*}} return (__mmask16)_mm512_cmpge_epi32_mask(__a, __b); } __mmask16 test_mm512_mask_cmpge_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_mask_cmpge_epi32_mask + // CHECK-LABEL: test_mm512_mask_cmpge_epi32_mask // CHECK: icmp sge <16 x i32> %{{.*}}, %{{.*}} // CHECK: and <16 x i1> %{{.*}}, %{{.*}} return (__mmask16)_mm512_mask_cmpge_epi32_mask(__u, __a, __b); } __mmask8 test_mm512_cmpge_epi64_mask(__m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_cmpge_epi64_mask + // CHECK-LABEL: test_mm512_cmpge_epi64_mask // CHECK: icmp sge <8 x i64> %{{.*}}, %{{.*}} return (__mmask8)_mm512_cmpge_epi64_mask(__a, __b); } __mmask8 test_mm512_mask_cmpge_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_mask_cmpge_epi64_mask + // CHECK-LABEL: test_mm512_mask_cmpge_epi64_mask // CHECK: icmp sge <8 x i64> %{{.*}}, %{{.*}} // CHECK: and <8 x i1> %{{.*}}, %{{.*}} return (__mmask8)_mm512_mask_cmpge_epi64_mask(__u, __a, __b); } __mmask16 test_mm512_cmpge_epu32_mask(__m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_cmpge_epu32_mask + // CHECK-LABEL: test_mm512_cmpge_epu32_mask // CHECK: icmp uge <16 x i32> %{{.*}}, %{{.*}} return (__mmask16)_mm512_cmpge_epu32_mask(__a, __b); } __mmask16 test_mm512_mask_cmpge_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_mask_cmpge_epu32_mask + // CHECK-LABEL: test_mm512_mask_cmpge_epu32_mask // CHECK: icmp uge <16 x i32> %{{.*}}, %{{.*}} // CHECK: and <16 x i1> %{{.*}}, %{{.*}} return (__mmask16)_mm512_mask_cmpge_epu32_mask(__u, __a, __b); } __mmask8 test_mm512_cmpge_epu64_mask(__m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_cmpge_epu64_mask + // CHECK-LABEL: test_mm512_cmpge_epu64_mask // CHECK: icmp uge <8 x i64> %{{.*}}, %{{.*}} return (__mmask8)_mm512_cmpge_epu64_mask(__a, __b); } __mmask8 test_mm512_mask_cmpge_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_mask_cmpge_epu64_mask + // CHECK-LABEL: test_mm512_mask_cmpge_epu64_mask // CHECK: icmp uge <8 x i64> %{{.*}}, %{{.*}} // CHECK: and <8 x i1> %{{.*}}, %{{.*}} return (__mmask8)_mm512_mask_cmpge_epu64_mask(__u, __a, __b); } __mmask16 test_mm512_cmpgt_epu32_mask(__m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_cmpgt_epu32_mask + // CHECK-LABEL: test_mm512_cmpgt_epu32_mask // CHECK: icmp ugt <16 x i32> %{{.*}}, %{{.*}} return (__mmask16)_mm512_cmpgt_epu32_mask(__a, __b); } __mmask16 test_mm512_mask_cmpgt_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_mask_cmpgt_epu32_mask + // CHECK-LABEL: test_mm512_mask_cmpgt_epu32_mask // CHECK: icmp ugt <16 x i32> %{{.*}}, %{{.*}} // CHECK: and <16 x i1> %{{.*}}, %{{.*}} return (__mmask16)_mm512_mask_cmpgt_epu32_mask(__u, __a, __b); } __mmask8 test_mm512_cmpgt_epu64_mask(__m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_cmpgt_epu64_mask + // CHECK-LABEL: test_mm512_cmpgt_epu64_mask // CHECK: icmp ugt <8 x i64> %{{.*}}, %{{.*}} return (__mmask8)_mm512_cmpgt_epu64_mask(__a, __b); } __mmask8 test_mm512_mask_cmpgt_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_mask_cmpgt_epu64_mask + // CHECK-LABEL: test_mm512_mask_cmpgt_epu64_mask // CHECK: icmp ugt <8 x i64> %{{.*}}, %{{.*}} // CHECK: and <8 x i1> %{{.*}}, %{{.*}} return (__mmask8)_mm512_mask_cmpgt_epu64_mask(__u, __a, __b); } __mmask16 test_mm512_cmple_epi32_mask(__m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_cmple_epi32_mask + // CHECK-LABEL: test_mm512_cmple_epi32_mask // CHECK: icmp sle <16 x i32> %{{.*}}, %{{.*}} return (__mmask16)_mm512_cmple_epi32_mask(__a, __b); } __mmask16 test_mm512_mask_cmple_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_mask_cmple_epi32_mask + // CHECK-LABEL: test_mm512_mask_cmple_epi32_mask // CHECK: icmp sle <16 x i32> %{{.*}}, %{{.*}} // CHECK: and <16 x i1> %{{.*}}, %{{.*}} return (__mmask16)_mm512_mask_cmple_epi32_mask(__u, __a, __b); } __mmask8 test_mm512_cmple_epi64_mask(__m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_cmple_epi64_mask + // CHECK-LABEL: test_mm512_cmple_epi64_mask // CHECK: icmp sle <8 x i64> %{{.*}}, %{{.*}} return (__mmask8)_mm512_cmple_epi64_mask(__a, __b); } __mmask8 test_mm512_mask_cmple_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_mask_cmple_epi64_mask + // CHECK-LABEL: test_mm512_mask_cmple_epi64_mask // CHECK: icmp sle <8 x i64> %{{.*}}, %{{.*}} // CHECK: and <8 x i1> %{{.*}}, %{{.*}} return (__mmask8)_mm512_mask_cmple_epi64_mask(__u, __a, __b); } __mmask16 test_mm512_cmple_epu32_mask(__m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_cmple_epu32_mask + // CHECK-LABEL: test_mm512_cmple_epu32_mask // CHECK: icmp ule <16 x i32> %{{.*}}, %{{.*}} return (__mmask16)_mm512_cmple_epu32_mask(__a, __b); } __mmask16 test_mm512_mask_cmple_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_mask_cmple_epu32_mask + // CHECK-LABEL: test_mm512_mask_cmple_epu32_mask // CHECK: icmp ule <16 x i32> %{{.*}}, %{{.*}} // CHECK: and <16 x i1> %{{.*}}, %{{.*}} return (__mmask16)_mm512_mask_cmple_epu32_mask(__u, __a, __b); } __mmask8 test_mm512_cmple_epu64_mask(__m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_cmple_epu64_mask + // CHECK-LABEL: test_mm512_cmple_epu64_mask // CHECK: icmp ule <8 x i64> %{{.*}}, %{{.*}} return (__mmask8)_mm512_cmple_epu64_mask(__a, __b); } __mmask8 test_mm512_mask_cmple_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_mask_cmple_epu64_mask + // CHECK-LABEL: test_mm512_mask_cmple_epu64_mask // CHECK: icmp ule <8 x i64> %{{.*}}, %{{.*}} // CHECK: and <8 x i1> %{{.*}}, %{{.*}} return (__mmask8)_mm512_mask_cmple_epu64_mask(__u, __a, __b); } __mmask16 test_mm512_cmplt_epi32_mask(__m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_cmplt_epi32_mask + // CHECK-LABEL: test_mm512_cmplt_epi32_mask // CHECK: icmp slt <16 x i32> %{{.*}}, %{{.*}} return (__mmask16)_mm512_cmplt_epi32_mask(__a, __b); } __mmask16 test_mm512_mask_cmplt_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_mask_cmplt_epi32_mask + // CHECK-LABEL: test_mm512_mask_cmplt_epi32_mask // CHECK: icmp slt <16 x i32> %{{.*}}, %{{.*}} // CHECK: and <16 x i1> %{{.*}}, %{{.*}} return (__mmask16)_mm512_mask_cmplt_epi32_mask(__u, __a, __b); } __mmask8 test_mm512_cmplt_epi64_mask(__m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_cmplt_epi64_mask + // CHECK-LABEL: test_mm512_cmplt_epi64_mask // CHECK: icmp slt <8 x i64> %{{.*}}, %{{.*}} return (__mmask8)_mm512_cmplt_epi64_mask(__a, __b); } __mmask8 test_mm512_mask_cmplt_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_mask_cmplt_epi64_mask + // CHECK-LABEL: test_mm512_mask_cmplt_epi64_mask // CHECK: icmp slt <8 x i64> %{{.*}}, %{{.*}} // CHECK: and <8 x i1> %{{.*}}, %{{.*}} return (__mmask8)_mm512_mask_cmplt_epi64_mask(__u, __a, __b); } __mmask16 test_mm512_cmplt_epu32_mask(__m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_cmplt_epu32_mask + // CHECK-LABEL: test_mm512_cmplt_epu32_mask // CHECK: icmp ult <16 x i32> %{{.*}}, %{{.*}} return (__mmask16)_mm512_cmplt_epu32_mask(__a, __b); } __mmask16 test_mm512_mask_cmplt_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_mask_cmplt_epu32_mask + // CHECK-LABEL: test_mm512_mask_cmplt_epu32_mask // CHECK: icmp ult <16 x i32> %{{.*}}, %{{.*}} // CHECK: and <16 x i1> %{{.*}}, %{{.*}} return (__mmask16)_mm512_mask_cmplt_epu32_mask(__u, __a, __b); } __mmask8 test_mm512_cmplt_epu64_mask(__m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_cmplt_epu64_mask + // CHECK-LABEL: test_mm512_cmplt_epu64_mask // CHECK: icmp ult <8 x i64> %{{.*}}, %{{.*}} return (__mmask8)_mm512_cmplt_epu64_mask(__a, __b); } __mmask8 test_mm512_mask_cmplt_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_mask_cmplt_epu64_mask + // CHECK-LABEL: test_mm512_mask_cmplt_epu64_mask // CHECK: icmp ult <8 x i64> %{{.*}}, %{{.*}} // CHECK: and <8 x i1> %{{.*}}, %{{.*}} return (__mmask8)_mm512_mask_cmplt_epu64_mask(__u, __a, __b); } __mmask16 test_mm512_cmpneq_epi32_mask(__m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_cmpneq_epi32_mask + // CHECK-LABEL: test_mm512_cmpneq_epi32_mask // CHECK: icmp ne <16 x i32> %{{.*}}, %{{.*}} return (__mmask16)_mm512_cmpneq_epi32_mask(__a, __b); } __mmask16 test_mm512_mask_cmpneq_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_mask_cmpneq_epi32_mask + // CHECK-LABEL: test_mm512_mask_cmpneq_epi32_mask // CHECK: icmp ne <16 x i32> %{{.*}}, %{{.*}} // CHECK: and <16 x i1> %{{.*}}, %{{.*}} return (__mmask16)_mm512_mask_cmpneq_epi32_mask(__u, __a, __b); } __mmask8 test_mm512_cmpneq_epi64_mask(__m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_cmpneq_epi64_mask + // CHECK-LABEL: test_mm512_cmpneq_epi64_mask // CHECK: icmp ne <8 x i64> %{{.*}}, %{{.*}} return (__mmask8)_mm512_cmpneq_epi64_mask(__a, __b); } __mmask8 test_mm512_mask_cmpneq_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_mask_cmpneq_epi64_mask + // CHECK-LABEL: test_mm512_mask_cmpneq_epi64_mask // CHECK: icmp ne <8 x i64> %{{.*}}, %{{.*}} // CHECK: and <8 x i1> %{{.*}}, %{{.*}} return (__mmask8)_mm512_mask_cmpneq_epi64_mask(__u, __a, __b); } __mmask16 test_mm512_cmpneq_epu32_mask(__m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_cmpneq_epu32_mask + // CHECK-LABEL: test_mm512_cmpneq_epu32_mask // CHECK: icmp ne <16 x i32> %{{.*}}, %{{.*}} return (__mmask16)_mm512_cmpneq_epu32_mask(__a, __b); } __mmask16 test_mm512_mask_cmpneq_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_mask_cmpneq_epu32_mask + // CHECK-LABEL: test_mm512_mask_cmpneq_epu32_mask // CHECK: icmp ne <16 x i32> %{{.*}}, %{{.*}} // CHECK: and <16 x i1> %{{.*}}, %{{.*}} return (__mmask16)_mm512_mask_cmpneq_epu32_mask(__u, __a, __b); } __mmask8 test_mm512_cmpneq_epu64_mask(__m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_cmpneq_epu64_mask + // CHECK-LABEL: test_mm512_cmpneq_epu64_mask // CHECK: icmp ne <8 x i64> %{{.*}}, %{{.*}} return (__mmask8)_mm512_cmpneq_epu64_mask(__a, __b); } __mmask8 test_mm512_mask_cmpneq_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_mask_cmpneq_epu64_mask + // CHECK-LABEL: test_mm512_mask_cmpneq_epu64_mask // CHECK: icmp ne <8 x i64> %{{.*}}, %{{.*}} // CHECK: and <8 x i1> %{{.*}}, %{{.*}} return (__mmask8)_mm512_mask_cmpneq_epu64_mask(__u, __a, __b); } __mmask16 test_mm512_cmp_eq_epi32_mask(__m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_cmp_eq_epi32_mask + // CHECK-LABEL: test_mm512_cmp_eq_epi32_mask // CHECK: icmp eq <16 x i32> %{{.*}}, %{{.*}} return (__mmask16)_mm512_cmp_epi32_mask(__a, __b, _MM_CMPINT_EQ); } __mmask16 test_mm512_mask_cmp_eq_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_mask_cmp_eq_epi32_mask + // CHECK-LABEL: test_mm512_mask_cmp_eq_epi32_mask // CHECK: icmp eq <16 x i32> %{{.*}}, %{{.*}} // CHECK: and <16 x i1> %{{.*}}, %{{.*}} return (__mmask16)_mm512_mask_cmp_epi32_mask(__u, __a, __b, _MM_CMPINT_EQ); } __mmask8 test_mm512_cmp_eq_epi64_mask(__m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_cmp_eq_epi64_mask + // CHECK-LABEL: test_mm512_cmp_eq_epi64_mask // CHECK: icmp eq <8 x i64> %{{.*}}, %{{.*}} return (__mmask8)_mm512_cmp_epi64_mask(__a, __b, _MM_CMPINT_EQ); } __mmask8 test_mm512_mask_cmp_eq_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_mask_cmp_eq_epi64_mask + // CHECK-LABEL: test_mm512_mask_cmp_eq_epi64_mask // CHECK: icmp eq <8 x i64> %{{.*}}, %{{.*}} // CHECK: and <8 x i1> %{{.*}}, %{{.*}} return (__mmask8)_mm512_mask_cmp_epi64_mask(__u, __a, __b, _MM_CMPINT_EQ); } __mmask16 test_mm512_cmp_epu32_mask(__m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_cmp_epu32_mask + // CHECK-LABEL: test_mm512_cmp_epu32_mask // CHECK: icmp eq <16 x i32> %{{.*}}, %{{.*}} return (__mmask16)_mm512_cmp_epu32_mask(__a, __b, 0); } __mmask16 test_mm512_mask_cmp_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_mask_cmp_epu32_mask + // CHECK-LABEL: test_mm512_mask_cmp_epu32_mask // CHECK: icmp eq <16 x i32> %{{.*}}, %{{.*}} // CHECK: and <16 x i1> %{{.*}}, %{{.*}} return (__mmask16)_mm512_mask_cmp_epu32_mask(__u, __a, __b, 0); } __mmask8 test_mm512_cmp_epu64_mask(__m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_cmp_epu64_mask + // CHECK-LABEL: test_mm512_cmp_epu64_mask // CHECK: icmp eq <8 x i64> %{{.*}}, %{{.*}} return (__mmask8)_mm512_cmp_epu64_mask(__a, __b, 0); } __mmask8 test_mm512_mask_cmp_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_mask_cmp_epu64_mask + // CHECK-LABEL: test_mm512_mask_cmp_epu64_mask // CHECK: icmp eq <8 x i64> %{{.*}}, %{{.*}} // CHECK: and <8 x i1> %{{.*}}, %{{.*}} return (__mmask8)_mm512_mask_cmp_epu64_mask(__u, __a, __b, 0); } __m512i test_mm512_mask_and_epi32(__m512i __src,__mmask16 __k, __m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_mask_and_epi32 + // CHECK-LABEL: test_mm512_mask_and_epi32 // CHECK: and <16 x i32> // CHECK: %[[MASK:.*]] = bitcast i16 %{{.*}} to <16 x i1> // CHECK: select <16 x i1> %[[MASK]], <16 x i32> %{{.*}}, <16 x i32> %{{.*}} @@ -2716,7 +2718,7 @@ __m512i test_mm512_mask_and_epi32(__m512i __src,__mmask16 __k, __m512i __a, __m5 } __m512i test_mm512_maskz_and_epi32(__mmask16 __k, __m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_maskz_and_epi32 + // CHECK-LABEL: test_mm512_maskz_and_epi32 // CHECK: and <16 x i32> // CHECK: %[[MASK:.*]] = bitcast i16 %{{.*}} to <16 x i1> // CHECK: select <16 x i1> %[[MASK]], <16 x i32> %{{.*}}, <16 x i32> %{{.*}} @@ -2724,7 +2726,7 @@ __m512i test_mm512_maskz_and_epi32(__mmask16 __k, __m512i __a, __m512i __b) { } __m512i test_mm512_mask_and_epi64(__m512i __src,__mmask8 __k, __m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_mask_and_epi64 + // CHECK-LABEL: test_mm512_mask_and_epi64 // CHECK: %[[AND_RES:.*]] = and <8 x i64> // CHECK: %[[MASK:.*]] = bitcast i8 %{{.*}} to <8 x i1> // CHECK: select <8 x i1> %[[MASK]], <8 x i64> %[[AND_RES]], <8 x i64> %{{.*}} @@ -2732,7 +2734,7 @@ __m512i test_mm512_mask_and_epi64(__m512i __src,__mmask8 __k, __m512i __a, __m51 } __m512i test_mm512_maskz_and_epi64(__mmask8 __k, __m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_maskz_and_epi64 + // CHECK-LABEL: test_mm512_maskz_and_epi64 // CHECK: %[[AND_RES:.*]] = and <8 x i64> // CHECK: %[[MASK:.*]] = bitcast i8 %{{.*}} to <8 x i1> // CHECK: select <8 x i1> %[[MASK]], <8 x i64> %[[AND_RES]], <8 x i64> %{{.*}} @@ -2740,7 +2742,7 @@ __m512i test_mm512_maskz_and_epi64(__mmask8 __k, __m512i __a, __m512i __b) { } __m512i test_mm512_mask_or_epi32(__m512i __src,__mmask16 __k, __m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_mask_or_epi32 + // CHECK-LABEL: test_mm512_mask_or_epi32 // CHECK: or <16 x i32> // CHECK: %[[MASK:.*]] = bitcast i16 %{{.*}} to <16 x i1> // CHECK: select <16 x i1> %[[MASK]], <16 x i32> %{{.*}}, <16 x i32> %{{.*}} @@ -2748,7 +2750,7 @@ __m512i test_mm512_mask_or_epi32(__m512i __src,__mmask16 __k, __m512i __a, __m51 } __m512i test_mm512_maskz_or_epi32(__mmask16 __k, __m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_maskz_or_epi32 + // CHECK-LABEL: test_mm512_maskz_or_epi32 // CHECK: or <16 x i32> // CHECK: %[[MASK:.*]] = bitcast i16 %{{.*}} to <16 x i1> // CHECK: select <16 x i1> %[[MASK]], <16 x i32> %{{.*}}, <16 x i32> %{{.*}} @@ -2756,7 +2758,7 @@ __m512i test_mm512_maskz_or_epi32(__mmask16 __k, __m512i __a, __m512i __b) { } __m512i test_mm512_mask_or_epi64(__m512i __src,__mmask8 __k, __m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_mask_or_epi64 + // CHECK-LABEL: test_mm512_mask_or_epi64 // CHECK: %[[OR_RES:.*]] = or <8 x i64> // CHECK: %[[MASK:.*]] = bitcast i8 %{{.*}} to <8 x i1> // CHECK: select <8 x i1> %[[MASK]], <8 x i64> %[[OR_RES]], <8 x i64> %{{.*}} @@ -2764,7 +2766,7 @@ __m512i test_mm512_mask_or_epi64(__m512i __src,__mmask8 __k, __m512i __a, __m512 } __m512i test_mm512_maskz_or_epi64(__mmask8 __k, __m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_maskz_or_epi64 + // CHECK-LABEL: test_mm512_maskz_or_epi64 // CHECK: %[[OR_RES:.*]] = or <8 x i64> // CHECK: %[[MASK:.*]] = bitcast i8 %{{.*}} to <8 x i1> // CHECK: select <8 x i1> %[[MASK]], <8 x i64> %[[OR_RES]], <8 x i64> %{{.*}} @@ -2772,7 +2774,7 @@ __m512i test_mm512_maskz_or_epi64(__mmask8 __k, __m512i __a, __m512i __b) { } __m512i test_mm512_mask_xor_epi32(__m512i __src,__mmask16 __k, __m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_mask_xor_epi32 + // CHECK-LABEL: test_mm512_mask_xor_epi32 // CHECK: xor <16 x i32> // CHECK: %[[MASK:.*]] = bitcast i16 %{{.*}} to <16 x i1> // CHECK: select <16 x i1> %[[MASK]], <16 x i32> %{{.*}}, <16 x i32> %{{.*}} @@ -2780,7 +2782,7 @@ __m512i test_mm512_mask_xor_epi32(__m512i __src,__mmask16 __k, __m512i __a, __m5 } __m512i test_mm512_maskz_xor_epi32(__mmask16 __k, __m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_maskz_xor_epi32 + // CHECK-LABEL: test_mm512_maskz_xor_epi32 // CHECK: xor <16 x i32> // CHECK: %[[MASK:.*]] = bitcast i16 %{{.*}} to <16 x i1> // CHECK: select <16 x i1> %[[MASK]], <16 x i32> %{{.*}}, <16 x i32> %{{.*}} @@ -2788,7 +2790,7 @@ __m512i test_mm512_maskz_xor_epi32(__mmask16 __k, __m512i __a, __m512i __b) { } __m512i test_mm512_mask_xor_epi64(__m512i __src,__mmask8 __k, __m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_mask_xor_epi64 + // CHECK-LABEL: test_mm512_mask_xor_epi64 // CHECK: %[[XOR_RES:.*]] = xor <8 x i64> // CHECK: %[[MASK:.*]] = bitcast i8 %{{.*}} to <8 x i1> // CHECK: select <8 x i1> %[[MASK]], <8 x i64> %[[XOR_RES]], <8 x i64> %{{.*}} @@ -2796,7 +2798,7 @@ __m512i test_mm512_mask_xor_epi64(__m512i __src,__mmask8 __k, __m512i __a, __m51 } __m512i test_mm512_maskz_xor_epi64(__mmask8 __k, __m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_maskz_xor_epi64 + // CHECK-LABEL: test_mm512_maskz_xor_epi64 // CHECK: %[[XOR_RES:.*]] = xor <8 x i64> // CHECK: %[[MASK:.*]] = bitcast i8 %{{.*}} to <8 x i1> // CHECK: select <8 x i1> %[[MASK]], <8 x i64> %[[XOR_RES]], <8 x i64> %{{.*}} @@ -2804,43 +2806,43 @@ __m512i test_mm512_maskz_xor_epi64(__mmask8 __k, __m512i __a, __m512i __b) { } __m512i test_mm512_and_epi32(__m512i __src,__mmask16 __k, __m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_and_epi32 + // CHECK-LABEL: test_mm512_and_epi32 // CHECK: and <16 x i32> return _mm512_and_epi32(__a, __b); } __m512i test_mm512_and_epi64(__m512i __src,__mmask8 __k, __m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_and_epi64 + // CHECK-LABEL: test_mm512_and_epi64 // CHECK: and <8 x i64> return _mm512_and_epi64(__a, __b); } __m512i test_mm512_or_epi32(__m512i __src,__mmask16 __k, __m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_or_epi32 + // CHECK-LABEL: test_mm512_or_epi32 // CHECK: or <16 x i32> return _mm512_or_epi32(__a, __b); } __m512i test_mm512_or_epi64(__m512i __src,__mmask8 __k, __m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_or_epi64 + // CHECK-LABEL: test_mm512_or_epi64 // CHECK: or <8 x i64> return _mm512_or_epi64(__a, __b); } __m512i test_mm512_xor_epi32(__m512i __src,__mmask16 __k, __m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_xor_epi32 + // CHECK-LABEL: test_mm512_xor_epi32 // CHECK: xor <16 x i32> return _mm512_xor_epi32(__a, __b); } __m512i test_mm512_xor_epi64(__m512i __src,__mmask8 __k, __m512i __a, __m512i __b) { - // CHECK-LABEL: @test_mm512_xor_epi64 + // CHECK-LABEL: test_mm512_xor_epi64 // CHECK: xor <8 x i64> return _mm512_xor_epi64(__a, __b); } __m512i test_mm512_maskz_andnot_epi32 (__mmask16 __k,__m512i __A, __m512i __B){ - // CHECK-LABEL: @test_mm512_maskz_andnot_epi32 + // CHECK-LABEL: test_mm512_maskz_andnot_epi32 // CHECK: xor <16 x i32> %{{.*}}, // CHECK: and <16 x i32> %{{.*}}, %{{.*}} // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} @@ -2849,7 +2851,7 @@ __m512i test_mm512_maskz_andnot_epi32 (__mmask16 __k,__m512i __A, __m512i __B){ __m512i test_mm512_mask_andnot_epi32 (__mmask16 __k,__m512i __A, __m512i __B, __m512i __src) { - // CHECK-LABEL: @test_mm512_mask_andnot_epi32 + // CHECK-LABEL: test_mm512_mask_andnot_epi32 // CHECK: xor <16 x i32> %{{.*}}, // CHECK: and <16 x i32> %{{.*}}, %{{.*}} // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} @@ -2858,7 +2860,7 @@ __m512i test_mm512_mask_andnot_epi32 (__mmask16 __k,__m512i __A, __m512i __B, __m512i test_mm512_andnot_si512(__m512i __A, __m512i __B) { - //CHECK-LABEL: @test_mm512_andnot_si512 + //CHECK-LABEL: test_mm512_andnot_si512 //CHECK: load {{.*}}%__A.addr.i, align 64 //CHECK: %not.i = xor{{.*}}, //CHECK: load {{.*}}%__B.addr.i, align 64 @@ -2868,14 +2870,14 @@ __m512i test_mm512_andnot_si512(__m512i __A, __m512i __B) } __m512i test_mm512_andnot_epi32(__m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_andnot_epi32 + // CHECK-LABEL: test_mm512_andnot_epi32 // CHECK: xor <16 x i32> %{{.*}}, // CHECK: and <16 x i32> %{{.*}}, %{{.*}} return _mm512_andnot_epi32(__A,__B); } __m512i test_mm512_maskz_andnot_epi64 (__mmask8 __k,__m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_maskz_andnot_epi64 + // CHECK-LABEL: test_mm512_maskz_andnot_epi64 // CHECK: xor <8 x i64> %{{.*}}, // CHECK: and <8 x i64> %{{.*}}, %{{.*}} // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} @@ -2884,7 +2886,7 @@ __m512i test_mm512_maskz_andnot_epi64 (__mmask8 __k,__m512i __A, __m512i __B) { __m512i test_mm512_mask_andnot_epi64 (__mmask8 __k,__m512i __A, __m512i __B, __m512i __src) { - //CHECK-LABEL: @test_mm512_mask_andnot_epi64 + //CHECK-LABEL: test_mm512_mask_andnot_epi64 // CHECK: xor <8 x i64> %{{.*}}, // CHECK: and <8 x i64> %{{.*}}, %{{.*}} // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} @@ -2892,14 +2894,14 @@ __m512i test_mm512_mask_andnot_epi64 (__mmask8 __k,__m512i __A, __m512i __B, } __m512i test_mm512_andnot_epi64(__m512i __A, __m512i __B) { - //CHECK-LABEL: @test_mm512_andnot_epi64 + //CHECK-LABEL: test_mm512_andnot_epi64 // CHECK: xor <8 x i64> %{{.*}}, // CHECK: and <8 x i64> %{{.*}}, %{{.*}} return _mm512_andnot_epi64(__A,__B); } __m512i test_mm512_maskz_sub_epi32 (__mmask16 __k,__m512i __A, __m512i __B) { - //CHECK-LABEL: @test_mm512_maskz_sub_epi32 + //CHECK-LABEL: test_mm512_maskz_sub_epi32 //CHECK: sub <16 x i32> %{{.*}}, %{{.*}} //CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_maskz_sub_epi32(__k,__A,__B); @@ -2907,20 +2909,20 @@ __m512i test_mm512_maskz_sub_epi32 (__mmask16 __k,__m512i __A, __m512i __B) { __m512i test_mm512_mask_sub_epi32 (__mmask16 __k,__m512i __A, __m512i __B, __m512i __src) { - //CHECK-LABEL: @test_mm512_mask_sub_epi32 + //CHECK-LABEL: test_mm512_mask_sub_epi32 //CHECK: sub <16 x i32> %{{.*}}, %{{.*}} //CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_mask_sub_epi32(__src,__k,__A,__B); } __m512i test_mm512_sub_epi32(__m512i __A, __m512i __B) { - //CHECK-LABEL: @test_mm512_sub_epi32 + //CHECK-LABEL: test_mm512_sub_epi32 //CHECK: sub <16 x i32> return _mm512_sub_epi32(__A,__B); } __m512i test_mm512_maskz_sub_epi64 (__mmask8 __k,__m512i __A, __m512i __B) { - //CHECK-LABEL: @test_mm512_maskz_sub_epi64 + //CHECK-LABEL: test_mm512_maskz_sub_epi64 //CHECK: sub <8 x i64> %{{.*}}, %{{.*}} //CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_maskz_sub_epi64(__k,__A,__B); @@ -2928,20 +2930,20 @@ __m512i test_mm512_maskz_sub_epi64 (__mmask8 __k,__m512i __A, __m512i __B) { __m512i test_mm512_mask_sub_epi64 (__mmask8 __k,__m512i __A, __m512i __B, __m512i __src) { - //CHECK-LABEL: @test_mm512_mask_sub_epi64 + //CHECK-LABEL: test_mm512_mask_sub_epi64 //CHECK: sub <8 x i64> %{{.*}}, %{{.*}} //CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_mask_sub_epi64(__src,__k,__A,__B); } __m512i test_mm512_sub_epi64(__m512i __A, __m512i __B) { - //CHECK-LABEL: @test_mm512_sub_epi64 + //CHECK-LABEL: test_mm512_sub_epi64 //CHECK: sub <8 x i64> return _mm512_sub_epi64(__A,__B); } __m512i test_mm512_maskz_add_epi32 (__mmask16 __k,__m512i __A, __m512i __B) { - //CHECK-LABEL: @test_mm512_maskz_add_epi32 + //CHECK-LABEL: test_mm512_maskz_add_epi32 //CHECK: add <16 x i32> %{{.*}}, %{{.*}} //CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_maskz_add_epi32(__k,__A,__B); @@ -2949,20 +2951,20 @@ __m512i test_mm512_maskz_add_epi32 (__mmask16 __k,__m512i __A, __m512i __B) { __m512i test_mm512_mask_add_epi32 (__mmask16 __k,__m512i __A, __m512i __B, __m512i __src) { - //CHECK-LABEL: @test_mm512_mask_add_epi32 + //CHECK-LABEL: test_mm512_mask_add_epi32 //CHECK: add <16 x i32> %{{.*}}, %{{.*}} //CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_mask_add_epi32(__src,__k,__A,__B); } __m512i test_mm512_add_epi32(__m512i __A, __m512i __B) { - //CHECK-LABEL: @test_mm512_add_epi32 + //CHECK-LABEL: test_mm512_add_epi32 //CHECK: add <16 x i32> return _mm512_add_epi32(__A,__B); } __m512i test_mm512_maskz_add_epi64 (__mmask8 __k,__m512i __A, __m512i __B) { - //CHECK-LABEL: @test_mm512_maskz_add_epi64 + //CHECK-LABEL: test_mm512_maskz_add_epi64 //CHECK: add <8 x i64> %{{.*}}, %{{.*}} //CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_maskz_add_epi64(__k,__A,__B); @@ -2970,20 +2972,20 @@ __m512i test_mm512_maskz_add_epi64 (__mmask8 __k,__m512i __A, __m512i __B) { __m512i test_mm512_mask_add_epi64 (__mmask8 __k,__m512i __A, __m512i __B, __m512i __src) { - //CHECK-LABEL: @test_mm512_mask_add_epi64 + //CHECK-LABEL: test_mm512_mask_add_epi64 //CHECK: add <8 x i64> %{{.*}}, %{{.*}} //CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_mask_add_epi64(__src,__k,__A,__B); } __m512i test_mm512_add_epi64(__m512i __A, __m512i __B) { - //CHECK-LABEL: @test_mm512_add_epi64 + //CHECK-LABEL: test_mm512_add_epi64 //CHECK: add <8 x i64> return _mm512_add_epi64(__A,__B); } __m512i test_mm512_mul_epi32(__m512i __A, __m512i __B) { - //CHECK-LABEL: @test_mm512_mul_epi32 + //CHECK-LABEL: test_mm512_mul_epi32 //CHECK: shl <8 x i64> %{{.*}}, //CHECK: ashr <8 x i64> %{{.*}}, //CHECK: shl <8 x i64> %{{.*}}, @@ -2993,7 +2995,7 @@ __m512i test_mm512_mul_epi32(__m512i __A, __m512i __B) { } __m512i test_mm512_maskz_mul_epi32 (__mmask8 __k,__m512i __A, __m512i __B) { - //CHECK-LABEL: @test_mm512_maskz_mul_epi32 + //CHECK-LABEL: test_mm512_maskz_mul_epi32 //CHECK: shl <8 x i64> %{{.*}}, //CHECK: ashr <8 x i64> %{{.*}}, //CHECK: shl <8 x i64> %{{.*}}, @@ -3004,7 +3006,7 @@ __m512i test_mm512_maskz_mul_epi32 (__mmask8 __k,__m512i __A, __m512i __B) { } __m512i test_mm512_mask_mul_epi32 (__mmask8 __k,__m512i __A, __m512i __B, __m512i __src) { - //CHECK-LABEL: @test_mm512_mask_mul_epi32 + //CHECK-LABEL: test_mm512_mask_mul_epi32 //CHECK: shl <8 x i64> %{{.*}}, //CHECK: ashr <8 x i64> %{{.*}}, //CHECK: shl <8 x i64> %{{.*}}, @@ -3015,7 +3017,7 @@ __m512i test_mm512_mask_mul_epi32 (__mmask8 __k,__m512i __A, __m512i __B, __m512 } __m512i test_mm512_mul_epu32 (__m512i __A, __m512i __B) { - //CHECK-LABEL: @test_mm512_mul_epu32 + //CHECK-LABEL: test_mm512_mul_epu32 //CHECK: and <8 x i64> %{{.*}}, //CHECK: and <8 x i64> %{{.*}}, //CHECK: mul <8 x i64> %{{.*}}, %{{.*}} @@ -3023,7 +3025,7 @@ __m512i test_mm512_mul_epu32 (__m512i __A, __m512i __B) { } __m512i test_mm512_maskz_mul_epu32 (__mmask8 __k,__m512i __A, __m512i __B) { - //CHECK-LABEL: @test_mm512_maskz_mul_epu32 + //CHECK-LABEL: test_mm512_maskz_mul_epu32 //CHECK: and <8 x i64> %{{.*}}, //CHECK: and <8 x i64> %{{.*}}, //CHECK: mul <8 x i64> %{{.*}}, %{{.*}} @@ -3032,7 +3034,7 @@ __m512i test_mm512_maskz_mul_epu32 (__mmask8 __k,__m512i __A, __m512i __B) { } __m512i test_mm512_mask_mul_epu32 (__mmask8 __k,__m512i __A, __m512i __B, __m512i __src) { - //CHECK-LABEL: @test_mm512_mask_mul_epu32 + //CHECK-LABEL: test_mm512_mask_mul_epu32 //CHECK: and <8 x i64> %{{.*}}, //CHECK: and <8 x i64> %{{.*}}, //CHECK: mul <8 x i64> %{{.*}}, %{{.*}} @@ -3041,113 +3043,113 @@ __m512i test_mm512_mask_mul_epu32 (__mmask8 __k,__m512i __A, __m512i __B, __m512 } __m512i test_mm512_maskz_mullo_epi32 (__mmask16 __k,__m512i __A, __m512i __B) { - //CHECK-LABEL: @test_mm512_maskz_mullo_epi32 + //CHECK-LABEL: test_mm512_maskz_mullo_epi32 //CHECK: mul <16 x i32> %{{.*}}, %{{.*}} //CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_maskz_mullo_epi32(__k,__A,__B); } __m512i test_mm512_mask_mullo_epi32 (__mmask16 __k,__m512i __A, __m512i __B, __m512i __src) { - //CHECK-LABEL: @test_mm512_mask_mullo_epi32 + //CHECK-LABEL: test_mm512_mask_mullo_epi32 //CHECK: mul <16 x i32> %{{.*}}, %{{.*}} //CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_mask_mullo_epi32(__src,__k,__A,__B); } __m512i test_mm512_mullo_epi32(__m512i __A, __m512i __B) { - //CHECK-LABEL: @test_mm512_mullo_epi32 + //CHECK-LABEL: test_mm512_mullo_epi32 //CHECK: mul <16 x i32> return _mm512_mullo_epi32(__A,__B); } __m512i test_mm512_mullox_epi64 (__m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_mullox_epi64 + // CHECK-LABEL: test_mm512_mullox_epi64 // CHECK: mul <8 x i64> return (__m512i) _mm512_mullox_epi64(__A, __B); } __m512i test_mm512_mask_mullox_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_mask_mullox_epi64 + // CHECK-LABEL: test_mm512_mask_mullox_epi64 // CHECK: mul <8 x i64> %{{.*}}, %{{.*}} // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return (__m512i) _mm512_mask_mullox_epi64(__W, __U, __A, __B); } __m512d test_mm512_add_round_pd(__m512d __A, __m512d __B) { - // CHECK-LABEL: @test_mm512_add_round_pd + // CHECK-LABEL: test_mm512_add_round_pd // CHECK: @llvm.x86.avx512.add.pd.512 return _mm512_add_round_pd(__A,__B,_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m512d test_mm512_mask_add_round_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { - // CHECK-LABEL: @test_mm512_mask_add_round_pd + // CHECK-LABEL: test_mm512_mask_add_round_pd // CHECK: @llvm.x86.avx512.add.pd.512 // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} return _mm512_mask_add_round_pd(__W,__U,__A,__B,_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m512d test_mm512_maskz_add_round_pd(__mmask8 __U, __m512d __A, __m512d __B) { - // CHECK-LABEL: @test_mm512_maskz_add_round_pd + // CHECK-LABEL: test_mm512_maskz_add_round_pd // CHECK: @llvm.x86.avx512.add.pd.512 // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} return _mm512_maskz_add_round_pd(__U,__A,__B,_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m512d test_mm512_mask_add_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { - // CHECK-LABEL: @test_mm512_mask_add_pd + // CHECK-LABEL: test_mm512_mask_add_pd // CHECK: fadd <8 x double> %{{.*}}, %{{.*}} // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} return _mm512_mask_add_pd(__W,__U,__A,__B); } __m512d test_mm512_maskz_add_pd(__mmask8 __U, __m512d __A, __m512d __B) { - // CHECK-LABEL: @test_mm512_maskz_add_pd + // CHECK-LABEL: test_mm512_maskz_add_pd // CHECK: fadd <8 x double> %{{.*}}, %{{.*}} // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} return _mm512_maskz_add_pd(__U,__A,__B); } __m512 test_mm512_add_round_ps(__m512 __A, __m512 __B) { - // CHECK-LABEL: @test_mm512_add_round_ps + // CHECK-LABEL: test_mm512_add_round_ps // CHECK: @llvm.x86.avx512.add.ps.512 return _mm512_add_round_ps(__A,__B,_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m512 test_mm512_mask_add_round_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { - // CHECK-LABEL: @test_mm512_mask_add_round_ps + // CHECK-LABEL: test_mm512_mask_add_round_ps // CHECK: @llvm.x86.avx512.add.ps.512 // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return _mm512_mask_add_round_ps(__W,__U,__A,__B,_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m512 test_mm512_maskz_add_round_ps(__mmask16 __U, __m512 __A, __m512 __B) { - // CHECK-LABEL: @test_mm512_maskz_add_round_ps + // CHECK-LABEL: test_mm512_maskz_add_round_ps // CHECK: @llvm.x86.avx512.add.ps.512 // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return _mm512_maskz_add_round_ps(__U,__A,__B,_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m512 test_mm512_mask_add_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { - // CHECK-LABEL: @test_mm512_mask_add_ps + // CHECK-LABEL: test_mm512_mask_add_ps // CHECK: fadd <16 x float> %{{.*}}, %{{.*}} // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return _mm512_mask_add_ps(__W,__U,__A,__B); } __m512 test_mm512_maskz_add_ps(__mmask16 __U, __m512 __A, __m512 __B) { - // CHECK-LABEL: @test_mm512_maskz_add_ps + // CHECK-LABEL: test_mm512_maskz_add_ps // CHECK: fadd <16 x float> %{{.*}}, %{{.*}} // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return _mm512_maskz_add_ps(__U,__A,__B); } __m128 test_mm_add_round_ss(__m128 __A, __m128 __B) { - // CHECK-LABEL: @test_mm_add_round_ss + // CHECK-LABEL: test_mm_add_round_ss // CHECK: @llvm.x86.avx512.mask.add.ss.round return _mm_add_round_ss(__A,__B,_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m128 test_mm_mask_add_round_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { - // CHECK-LABEL: @test_mm_mask_add_round_ss + // CHECK-LABEL: test_mm_mask_add_round_ss // CHECK: @llvm.x86.avx512.mask.add.ss.round return _mm_mask_add_round_ss(__W,__U,__A,__B,_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m128 test_mm_maskz_add_round_ss(__mmask8 __U, __m128 __A, __m128 __B) { - // CHECK-LABEL: @test_mm_maskz_add_round_ss + // CHECK-LABEL: test_mm_maskz_add_round_ss // CHECK: @llvm.x86.avx512.mask.add.ss.round return _mm_maskz_add_round_ss(__U,__A,__B,_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m128 test_mm_mask_add_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { - // CHECK-LABEL: @test_mm_mask_add_ss + // CHECK-LABEL: test_mm_mask_add_ss // CHECK-NOT: @llvm.x86.avx512.mask.add.ss.round // CHECK: extractelement <4 x float> %{{.*}}, i32 0 // CHECK: extractelement <4 x float> %{{.*}}, i32 0 @@ -3162,7 +3164,7 @@ __m128 test_mm_mask_add_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { return _mm_mask_add_ss(__W,__U,__A,__B); } __m128 test_mm_maskz_add_ss(__mmask8 __U, __m128 __A, __m128 __B) { - // CHECK-LABEL: @test_mm_maskz_add_ss + // CHECK-LABEL: test_mm_maskz_add_ss // CHECK-NOT: @llvm.x86.avx512.mask.add.ss.round // CHECK: extractelement <4 x float> %{{.*}}, i32 0 // CHECK: extractelement <4 x float> %{{.*}}, i32 0 @@ -3177,22 +3179,22 @@ __m128 test_mm_maskz_add_ss(__mmask8 __U, __m128 __A, __m128 __B) { return _mm_maskz_add_ss(__U,__A,__B); } __m128d test_mm_add_round_sd(__m128d __A, __m128d __B) { - // CHECK-LABEL: @test_mm_add_round_sd + // CHECK-LABEL: test_mm_add_round_sd // CHECK: @llvm.x86.avx512.mask.add.sd.round return _mm_add_round_sd(__A,__B,_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m128d test_mm_mask_add_round_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { - // CHECK-LABEL: @test_mm_mask_add_round_sd + // CHECK-LABEL: test_mm_mask_add_round_sd // CHECK: @llvm.x86.avx512.mask.add.sd.round return _mm_mask_add_round_sd(__W,__U,__A,__B,_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m128d test_mm_maskz_add_round_sd(__mmask8 __U, __m128d __A, __m128d __B) { - // CHECK-LABEL: @test_mm_maskz_add_round_sd + // CHECK-LABEL: test_mm_maskz_add_round_sd // CHECK: @llvm.x86.avx512.mask.add.sd.round return _mm_maskz_add_round_sd(__U,__A,__B,_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m128d test_mm_mask_add_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { - // CHECK-LABEL: @test_mm_mask_add_sd + // CHECK-LABEL: test_mm_mask_add_sd // CHECK-NOT: @llvm.x86.avx512.mask.add.sd.round // CHECK: extractelement <2 x double> %{{.*}}, i32 0 // CHECK: extractelement <2 x double> %{{.*}}, i32 0 @@ -3207,7 +3209,7 @@ __m128d test_mm_mask_add_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) return _mm_mask_add_sd(__W,__U,__A,__B); } __m128d test_mm_maskz_add_sd(__mmask8 __U, __m128d __A, __m128d __B) { - // CHECK-LABEL: @test_mm_maskz_add_sd + // CHECK-LABEL: test_mm_maskz_add_sd // CHECK-NOT: @llvm.x86.avx512.mask.add.sd.round // CHECK: extractelement <2 x double> %{{.*}}, i32 0 // CHECK: extractelement <2 x double> %{{.*}}, i32 0 @@ -3222,80 +3224,80 @@ __m128d test_mm_maskz_add_sd(__mmask8 __U, __m128d __A, __m128d __B) { return _mm_maskz_add_sd(__U,__A,__B); } __m512d test_mm512_sub_round_pd(__m512d __A, __m512d __B) { - // CHECK-LABEL: @test_mm512_sub_round_pd + // CHECK-LABEL: test_mm512_sub_round_pd // CHECK: @llvm.x86.avx512.sub.pd.512 return _mm512_sub_round_pd(__A,__B,_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m512d test_mm512_mask_sub_round_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { - // CHECK-LABEL: @test_mm512_mask_sub_round_pd + // CHECK-LABEL: test_mm512_mask_sub_round_pd // CHECK: @llvm.x86.avx512.sub.pd.512 // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} return _mm512_mask_sub_round_pd(__W,__U,__A,__B,_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m512d test_mm512_maskz_sub_round_pd(__mmask8 __U, __m512d __A, __m512d __B) { - // CHECK-LABEL: @test_mm512_maskz_sub_round_pd + // CHECK-LABEL: test_mm512_maskz_sub_round_pd // CHECK: @llvm.x86.avx512.sub.pd.512 // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} return _mm512_maskz_sub_round_pd(__U,__A,__B,_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m512d test_mm512_mask_sub_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { - // CHECK-LABEL: @test_mm512_mask_sub_pd + // CHECK-LABEL: test_mm512_mask_sub_pd // CHECK: fsub <8 x double> %{{.*}}, %{{.*}} // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} return _mm512_mask_sub_pd(__W,__U,__A,__B); } __m512d test_mm512_maskz_sub_pd(__mmask8 __U, __m512d __A, __m512d __B) { - // CHECK-LABEL: @test_mm512_maskz_sub_pd + // CHECK-LABEL: test_mm512_maskz_sub_pd // CHECK: fsub <8 x double> %{{.*}}, %{{.*}} // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} return _mm512_maskz_sub_pd(__U,__A,__B); } __m512 test_mm512_sub_round_ps(__m512 __A, __m512 __B) { - // CHECK-LABEL: @test_mm512_sub_round_ps + // CHECK-LABEL: test_mm512_sub_round_ps // CHECK: @llvm.x86.avx512.sub.ps.512 return _mm512_sub_round_ps(__A,__B,_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m512 test_mm512_mask_sub_round_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { - // CHECK-LABEL: @test_mm512_mask_sub_round_ps + // CHECK-LABEL: test_mm512_mask_sub_round_ps // CHECK: @llvm.x86.avx512.sub.ps.512 // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return _mm512_mask_sub_round_ps(__W,__U,__A,__B,_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m512 test_mm512_maskz_sub_round_ps(__mmask16 __U, __m512 __A, __m512 __B) { - // CHECK-LABEL: @test_mm512_maskz_sub_round_ps + // CHECK-LABEL: test_mm512_maskz_sub_round_ps // CHECK: @llvm.x86.avx512.sub.ps.512 // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return _mm512_maskz_sub_round_ps(__U,__A,__B,_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m512 test_mm512_mask_sub_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { - // CHECK-LABEL: @test_mm512_mask_sub_ps + // CHECK-LABEL: test_mm512_mask_sub_ps // CHECK: fsub <16 x float> %{{.*}}, %{{.*}} // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return _mm512_mask_sub_ps(__W,__U,__A,__B); } __m512 test_mm512_maskz_sub_ps(__mmask16 __U, __m512 __A, __m512 __B) { - // CHECK-LABEL: @test_mm512_maskz_sub_ps + // CHECK-LABEL: test_mm512_maskz_sub_ps // CHECK: fsub <16 x float> %{{.*}}, %{{.*}} // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return _mm512_maskz_sub_ps(__U,__A,__B); } __m128 test_mm_sub_round_ss(__m128 __A, __m128 __B) { - // CHECK-LABEL: @test_mm_sub_round_ss + // CHECK-LABEL: test_mm_sub_round_ss // CHECK: @llvm.x86.avx512.mask.sub.ss.round return _mm_sub_round_ss(__A,__B,_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m128 test_mm_mask_sub_round_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { - // CHECK-LABEL: @test_mm_mask_sub_round_ss + // CHECK-LABEL: test_mm_mask_sub_round_ss // CHECK: @llvm.x86.avx512.mask.sub.ss.round return _mm_mask_sub_round_ss(__W,__U,__A,__B,_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m128 test_mm_maskz_sub_round_ss(__mmask8 __U, __m128 __A, __m128 __B) { - // CHECK-LABEL: @test_mm_maskz_sub_round_ss + // CHECK-LABEL: test_mm_maskz_sub_round_ss // CHECK: @llvm.x86.avx512.mask.sub.ss.round return _mm_maskz_sub_round_ss(__U,__A,__B,_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m128 test_mm_mask_sub_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { - // CHECK-LABEL: @test_mm_mask_sub_ss + // CHECK-LABEL: test_mm_mask_sub_ss // CHECK-NOT: @llvm.x86.avx512.mask.sub.ss.round // CHECK: extractelement <4 x float> %{{.*}}, i32 0 // CHECK: extractelement <4 x float> %{{.*}}, i32 0 @@ -3310,7 +3312,7 @@ __m128 test_mm_mask_sub_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { return _mm_mask_sub_ss(__W,__U,__A,__B); } __m128 test_mm_maskz_sub_ss(__mmask8 __U, __m128 __A, __m128 __B) { - // CHECK-LABEL: @test_mm_maskz_sub_ss + // CHECK-LABEL: test_mm_maskz_sub_ss // CHECK-NOT: @llvm.x86.avx512.mask.sub.ss.round // CHECK: extractelement <4 x float> %{{.*}}, i32 0 // CHECK: extractelement <4 x float> %{{.*}}, i32 0 @@ -3325,22 +3327,22 @@ __m128 test_mm_maskz_sub_ss(__mmask8 __U, __m128 __A, __m128 __B) { return _mm_maskz_sub_ss(__U,__A,__B); } __m128d test_mm_sub_round_sd(__m128d __A, __m128d __B) { - // CHECK-LABEL: @test_mm_sub_round_sd + // CHECK-LABEL: test_mm_sub_round_sd // CHECK: @llvm.x86.avx512.mask.sub.sd.round return _mm_sub_round_sd(__A,__B,_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m128d test_mm_mask_sub_round_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { - // CHECK-LABEL: @test_mm_mask_sub_round_sd + // CHECK-LABEL: test_mm_mask_sub_round_sd // CHECK: @llvm.x86.avx512.mask.sub.sd.round return _mm_mask_sub_round_sd(__W,__U,__A,__B,_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m128d test_mm_maskz_sub_round_sd(__mmask8 __U, __m128d __A, __m128d __B) { - // CHECK-LABEL: @test_mm_maskz_sub_round_sd + // CHECK-LABEL: test_mm_maskz_sub_round_sd // CHECK: @llvm.x86.avx512.mask.sub.sd.round return _mm_maskz_sub_round_sd(__U,__A,__B,_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m128d test_mm_mask_sub_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { - // CHECK-LABEL: @test_mm_mask_sub_sd + // CHECK-LABEL: test_mm_mask_sub_sd // CHECK-NOT: @llvm.x86.avx512.mask.sub.sd.round // CHECK: extractelement <2 x double> %{{.*}}, i32 0 // CHECK: extractelement <2 x double> %{{.*}}, i32 0 @@ -3355,7 +3357,7 @@ __m128d test_mm_mask_sub_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) return _mm_mask_sub_sd(__W,__U,__A,__B); } __m128d test_mm_maskz_sub_sd(__mmask8 __U, __m128d __A, __m128d __B) { - // CHECK-LABEL: @test_mm_maskz_sub_sd + // CHECK-LABEL: test_mm_maskz_sub_sd // CHECK-NOT: @llvm.x86.avx512.mask.sub.sd.round // CHECK: extractelement <2 x double> %{{.*}}, i32 0 // CHECK: extractelement <2 x double> %{{.*}}, i32 0 @@ -3370,80 +3372,80 @@ __m128d test_mm_maskz_sub_sd(__mmask8 __U, __m128d __A, __m128d __B) { return _mm_maskz_sub_sd(__U,__A,__B); } __m512d test_mm512_mul_round_pd(__m512d __A, __m512d __B) { - // CHECK-LABEL: @test_mm512_mul_round_pd + // CHECK-LABEL: test_mm512_mul_round_pd // CHECK: @llvm.x86.avx512.mul.pd.512 return _mm512_mul_round_pd(__A,__B,_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m512d test_mm512_mask_mul_round_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { - // CHECK-LABEL: @test_mm512_mask_mul_round_pd + // CHECK-LABEL: test_mm512_mask_mul_round_pd // CHECK: @llvm.x86.avx512.mul.pd.512 // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} return _mm512_mask_mul_round_pd(__W,__U,__A,__B,_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m512d test_mm512_maskz_mul_round_pd(__mmask8 __U, __m512d __A, __m512d __B) { - // CHECK-LABEL: @test_mm512_maskz_mul_round_pd + // CHECK-LABEL: test_mm512_maskz_mul_round_pd // CHECK: @llvm.x86.avx512.mul.pd.512 // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} return _mm512_maskz_mul_round_pd(__U,__A,__B,_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m512d test_mm512_mask_mul_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { - // CHECK-LABEL: @test_mm512_mask_mul_pd + // CHECK-LABEL: test_mm512_mask_mul_pd // CHECK: fmul <8 x double> %{{.*}}, %{{.*}} // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} return _mm512_mask_mul_pd(__W,__U,__A,__B); } __m512d test_mm512_maskz_mul_pd(__mmask8 __U, __m512d __A, __m512d __B) { - // CHECK-LABEL: @test_mm512_maskz_mul_pd + // CHECK-LABEL: test_mm512_maskz_mul_pd // CHECK: fmul <8 x double> %{{.*}}, %{{.*}} // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} return _mm512_maskz_mul_pd(__U,__A,__B); } __m512 test_mm512_mul_round_ps(__m512 __A, __m512 __B) { - // CHECK-LABEL: @test_mm512_mul_round_ps + // CHECK-LABEL: test_mm512_mul_round_ps // CHECK: @llvm.x86.avx512.mul.ps.512 return _mm512_mul_round_ps(__A,__B,_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m512 test_mm512_mask_mul_round_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { - // CHECK-LABEL: @test_mm512_mask_mul_round_ps + // CHECK-LABEL: test_mm512_mask_mul_round_ps // CHECK: @llvm.x86.avx512.mul.ps.512 // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return _mm512_mask_mul_round_ps(__W,__U,__A,__B,_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m512 test_mm512_maskz_mul_round_ps(__mmask16 __U, __m512 __A, __m512 __B) { - // CHECK-LABEL: @test_mm512_maskz_mul_round_ps + // CHECK-LABEL: test_mm512_maskz_mul_round_ps // CHECK: @llvm.x86.avx512.mul.ps.512 // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return _mm512_maskz_mul_round_ps(__U,__A,__B,_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m512 test_mm512_mask_mul_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { - // CHECK-LABEL: @test_mm512_mask_mul_ps + // CHECK-LABEL: test_mm512_mask_mul_ps // CHECK: fmul <16 x float> %{{.*}}, %{{.*}} // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return _mm512_mask_mul_ps(__W,__U,__A,__B); } __m512 test_mm512_maskz_mul_ps(__mmask16 __U, __m512 __A, __m512 __B) { - // CHECK-LABEL: @test_mm512_maskz_mul_ps + // CHECK-LABEL: test_mm512_maskz_mul_ps // CHECK: fmul <16 x float> %{{.*}}, %{{.*}} // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return _mm512_maskz_mul_ps(__U,__A,__B); } __m128 test_mm_mul_round_ss(__m128 __A, __m128 __B) { - // CHECK-LABEL: @test_mm_mul_round_ss + // CHECK-LABEL: test_mm_mul_round_ss // CHECK: @llvm.x86.avx512.mask.mul.ss.round return _mm_mul_round_ss(__A,__B,_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m128 test_mm_mask_mul_round_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { - // CHECK-LABEL: @test_mm_mask_mul_round_ss + // CHECK-LABEL: test_mm_mask_mul_round_ss // CHECK: @llvm.x86.avx512.mask.mul.ss.round return _mm_mask_mul_round_ss(__W,__U,__A,__B,_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m128 test_mm_maskz_mul_round_ss(__mmask8 __U, __m128 __A, __m128 __B) { - // CHECK-LABEL: @test_mm_maskz_mul_round_ss + // CHECK-LABEL: test_mm_maskz_mul_round_ss // CHECK: @llvm.x86.avx512.mask.mul.ss.round return _mm_maskz_mul_round_ss(__U,__A,__B,_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m128 test_mm_mask_mul_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { - // CHECK-LABEL: @test_mm_mask_mul_ss + // CHECK-LABEL: test_mm_mask_mul_ss // CHECK-NOT: @llvm.x86.avx512.mask.mul.ss.round // CHECK: extractelement <4 x float> %{{.*}}, i32 0 // CHECK: extractelement <4 x float> %{{.*}}, i32 0 @@ -3458,7 +3460,7 @@ __m128 test_mm_mask_mul_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { return _mm_mask_mul_ss(__W,__U,__A,__B); } __m128 test_mm_maskz_mul_ss(__mmask8 __U, __m128 __A, __m128 __B) { - // CHECK-LABEL: @test_mm_maskz_mul_ss + // CHECK-LABEL: test_mm_maskz_mul_ss // CHECK-NOT: @llvm.x86.avx512.mask.mul.ss.round // CHECK: extractelement <4 x float> %{{.*}}, i32 0 // CHECK: extractelement <4 x float> %{{.*}}, i32 0 @@ -3473,22 +3475,22 @@ __m128 test_mm_maskz_mul_ss(__mmask8 __U, __m128 __A, __m128 __B) { return _mm_maskz_mul_ss(__U,__A,__B); } __m128d test_mm_mul_round_sd(__m128d __A, __m128d __B) { - // CHECK-LABEL: @test_mm_mul_round_sd + // CHECK-LABEL: test_mm_mul_round_sd // CHECK: @llvm.x86.avx512.mask.mul.sd.round return _mm_mul_round_sd(__A,__B,_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m128d test_mm_mask_mul_round_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { - // CHECK-LABEL: @test_mm_mask_mul_round_sd + // CHECK-LABEL: test_mm_mask_mul_round_sd // CHECK: @llvm.x86.avx512.mask.mul.sd.round return _mm_mask_mul_round_sd(__W,__U,__A,__B,_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m128d test_mm_maskz_mul_round_sd(__mmask8 __U, __m128d __A, __m128d __B) { - // CHECK-LABEL: @test_mm_maskz_mul_round_sd + // CHECK-LABEL: test_mm_maskz_mul_round_sd // CHECK: @llvm.x86.avx512.mask.mul.sd.round return _mm_maskz_mul_round_sd(__U,__A,__B,_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m128d test_mm_mask_mul_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { - // CHECK-LABEL: @test_mm_mask_mul_sd + // CHECK-LABEL: test_mm_mask_mul_sd // CHECK-NOT: @llvm.x86.avx512.mask.mul.sd.round // CHECK: extractelement <2 x double> %{{.*}}, i32 0 // CHECK: extractelement <2 x double> %{{.*}}, i32 0 @@ -3503,7 +3505,7 @@ __m128d test_mm_mask_mul_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) return _mm_mask_mul_sd(__W,__U,__A,__B); } __m128d test_mm_maskz_mul_sd(__mmask8 __U, __m128d __A, __m128d __B) { - // CHECK-LABEL: @test_mm_maskz_mul_sd + // CHECK-LABEL: test_mm_maskz_mul_sd // CHECK-NOT: @llvm.x86.avx512.mask.mul.sd.round // CHECK: extractelement <2 x double> %{{.*}}, i32 0 // CHECK: extractelement <2 x double> %{{.*}}, i32 0 @@ -3518,90 +3520,90 @@ __m128d test_mm_maskz_mul_sd(__mmask8 __U, __m128d __A, __m128d __B) { return _mm_maskz_mul_sd(__U,__A,__B); } __m512d test_mm512_div_round_pd(__m512d __A, __m512d __B) { - // CHECK-LABEL: @test_mm512_div_round_pd + // CHECK-LABEL: test_mm512_div_round_pd // CHECK: @llvm.x86.avx512.div.pd.512 return _mm512_div_round_pd(__A,__B,_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m512d test_mm512_mask_div_round_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { - // CHECK-LABEL: @test_mm512_mask_div_round_pd + // CHECK-LABEL: test_mm512_mask_div_round_pd // CHECK: @llvm.x86.avx512.div.pd.512 // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} return _mm512_mask_div_round_pd(__W,__U,__A,__B,_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m512d test_mm512_maskz_div_round_pd(__mmask8 __U, __m512d __A, __m512d __B) { - // CHECK-LABEL: @test_mm512_maskz_div_round_pd + // CHECK-LABEL: test_mm512_maskz_div_round_pd // CHECK: @llvm.x86.avx512.div.pd.512 // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} return _mm512_maskz_div_round_pd(__U,__A,__B,_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m512d test_mm512_div_pd(__m512d __a, __m512d __b) { - // CHECK-LABEL: @test_mm512_div_pd + // CHECK-LABEL: test_mm512_div_pd // CHECK: fdiv <8 x double> return _mm512_div_pd(__a,__b); } __m512d test_mm512_mask_div_pd(__m512d __w, __mmask8 __u, __m512d __a, __m512d __b) { - // CHECK-LABEL: @test_mm512_mask_div_pd + // CHECK-LABEL: test_mm512_mask_div_pd // CHECK: fdiv <8 x double> %{{.*}}, %{{.*}} // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} return _mm512_mask_div_pd(__w,__u,__a,__b); } __m512d test_mm512_maskz_div_pd(__mmask8 __U, __m512d __A, __m512d __B) { - // CHECK-LABEL: @test_mm512_maskz_div_pd + // CHECK-LABEL: test_mm512_maskz_div_pd // CHECK: fdiv <8 x double> %{{.*}}, %{{.*}} // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} return _mm512_maskz_div_pd(__U,__A,__B); } __m512 test_mm512_div_round_ps(__m512 __A, __m512 __B) { - // CHECK-LABEL: @test_mm512_div_round_ps + // CHECK-LABEL: test_mm512_div_round_ps // CHECK: @llvm.x86.avx512.div.ps.512 return _mm512_div_round_ps(__A,__B,_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m512 test_mm512_mask_div_round_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { - // CHECK-LABEL: @test_mm512_mask_div_round_ps + // CHECK-LABEL: test_mm512_mask_div_round_ps // CHECK: @llvm.x86.avx512.div.ps.512 // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return _mm512_mask_div_round_ps(__W,__U,__A,__B,_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m512 test_mm512_maskz_div_round_ps(__mmask16 __U, __m512 __A, __m512 __B) { - // CHECK-LABEL: @test_mm512_maskz_div_round_ps + // CHECK-LABEL: test_mm512_maskz_div_round_ps // CHECK: @llvm.x86.avx512.div.ps.512 // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return _mm512_maskz_div_round_ps(__U,__A,__B,_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m512 test_mm512_div_ps(__m512 __A, __m512 __B) { - // CHECK-LABEL: @test_mm512_div_ps + // CHECK-LABEL: test_mm512_div_ps // CHECK: fdiv <16 x float> return _mm512_div_ps(__A,__B); } __m512 test_mm512_mask_div_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { - // CHECK-LABEL: @test_mm512_mask_div_ps + // CHECK-LABEL: test_mm512_mask_div_ps // CHECK: fdiv <16 x float> %{{.*}}, %{{.*}} // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return _mm512_mask_div_ps(__W,__U,__A,__B); } __m512 test_mm512_maskz_div_ps(__mmask16 __U, __m512 __A, __m512 __B) { - // CHECK-LABEL: @test_mm512_maskz_div_ps + // CHECK-LABEL: test_mm512_maskz_div_ps // CHECK: fdiv <16 x float> %{{.*}}, %{{.*}} // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return _mm512_maskz_div_ps(__U,__A,__B); } __m128 test_mm_div_round_ss(__m128 __A, __m128 __B) { - // CHECK-LABEL: @test_mm_div_round_ss + // CHECK-LABEL: test_mm_div_round_ss // CHECK: @llvm.x86.avx512.mask.div.ss.round return _mm_div_round_ss(__A,__B,_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m128 test_mm_mask_div_round_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { - // CHECK-LABEL: @test_mm_mask_div_round_ss + // CHECK-LABEL: test_mm_mask_div_round_ss // CHECK: @llvm.x86.avx512.mask.div.ss.round return _mm_mask_div_round_ss(__W,__U,__A,__B,_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m128 test_mm_maskz_div_round_ss(__mmask8 __U, __m128 __A, __m128 __B) { - // CHECK-LABEL: @test_mm_maskz_div_round_ss + // CHECK-LABEL: test_mm_maskz_div_round_ss // CHECK: @llvm.x86.avx512.mask.div.ss.round return _mm_maskz_div_round_ss(__U,__A,__B,_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m128 test_mm_mask_div_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { - // CHECK-LABEL: @test_mm_mask_div_ss + // CHECK-LABEL: test_mm_mask_div_ss // CHECK: extractelement <4 x float> %{{.*}}, i32 0 // CHECK: extractelement <4 x float> %{{.*}}, i32 0 // CHECK: fdiv float %{{.*}}, %{{.*}} @@ -3615,7 +3617,7 @@ __m128 test_mm_mask_div_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { return _mm_mask_div_ss(__W,__U,__A,__B); } __m128 test_mm_maskz_div_ss(__mmask8 __U, __m128 __A, __m128 __B) { - // CHECK-LABEL: @test_mm_maskz_div_ss + // CHECK-LABEL: test_mm_maskz_div_ss // CHECK: extractelement <4 x float> %{{.*}}, i32 0 // CHECK: extractelement <4 x float> %{{.*}}, i32 0 // CHECK: fdiv float %{{.*}}, %{{.*}} @@ -3629,22 +3631,22 @@ __m128 test_mm_maskz_div_ss(__mmask8 __U, __m128 __A, __m128 __B) { return _mm_maskz_div_ss(__U,__A,__B); } __m128d test_mm_div_round_sd(__m128d __A, __m128d __B) { - // CHECK-LABEL: @test_mm_div_round_sd + // CHECK-LABEL: test_mm_div_round_sd // CHECK: @llvm.x86.avx512.mask.div.sd.round return _mm_div_round_sd(__A,__B,_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m128d test_mm_mask_div_round_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { - // CHECK-LABEL: @test_mm_mask_div_round_sd + // CHECK-LABEL: test_mm_mask_div_round_sd // CHECK: @llvm.x86.avx512.mask.div.sd.round return _mm_mask_div_round_sd(__W,__U,__A,__B,_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m128d test_mm_maskz_div_round_sd(__mmask8 __U, __m128d __A, __m128d __B) { - // CHECK-LABEL: @test_mm_maskz_div_round_sd + // CHECK-LABEL: test_mm_maskz_div_round_sd // CHECK: @llvm.x86.avx512.mask.div.sd.round return _mm_maskz_div_round_sd(__U,__A,__B,_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m128d test_mm_mask_div_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { - // CHECK-LABEL: @test_mm_mask_div_sd + // CHECK-LABEL: test_mm_mask_div_sd // CHECK: extractelement <2 x double> %{{.*}}, i32 0 // CHECK: extractelement <2 x double> %{{.*}}, i32 0 // CHECK: fdiv double %{{.*}}, %{{.*}} @@ -3658,7 +3660,7 @@ __m128d test_mm_mask_div_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) return _mm_mask_div_sd(__W,__U,__A,__B); } __m128d test_mm_maskz_div_sd(__mmask8 __U, __m128d __A, __m128d __B) { - // CHECK-LABEL: @test_mm_maskz_div_sd + // CHECK-LABEL: test_mm_maskz_div_sd // CHECK: extractelement <2 x double> %{{.*}}, i32 0 // CHECK: extractelement <2 x double> %{{.*}}, i32 0 // CHECK: fdiv double %{{.*}}, %{{.*}} @@ -3672,325 +3674,325 @@ __m128d test_mm_maskz_div_sd(__mmask8 __U, __m128d __A, __m128d __B) { return _mm_maskz_div_sd(__U,__A,__B); } __m128 test_mm_max_round_ss(__m128 __A, __m128 __B) { - // CHECK-LABEL: @test_mm_max_round_ss + // CHECK-LABEL: test_mm_max_round_ss // CHECK: @llvm.x86.avx512.mask.max.ss.round return _mm_max_round_ss(__A,__B,0x08); } __m128 test_mm_mask_max_round_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { - // CHECK-LABEL: @test_mm_mask_max_round_ss + // CHECK-LABEL: test_mm_mask_max_round_ss // CHECK: @llvm.x86.avx512.mask.max.ss.round return _mm_mask_max_round_ss(__W,__U,__A,__B,0x08); } __m128 test_mm_maskz_max_round_ss(__mmask8 __U, __m128 __A, __m128 __B) { - // CHECK-LABEL: @test_mm_maskz_max_round_ss + // CHECK-LABEL: test_mm_maskz_max_round_ss // CHECK: @llvm.x86.avx512.mask.max.ss.round return _mm_maskz_max_round_ss(__U,__A,__B,0x08); } __m128 test_mm_mask_max_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { - // CHECK-LABEL: @test_mm_mask_max_ss + // CHECK-LABEL: test_mm_mask_max_ss // CHECK: @llvm.x86.avx512.mask.max.ss.round return _mm_mask_max_ss(__W,__U,__A,__B); } __m128 test_mm_maskz_max_ss(__mmask8 __U, __m128 __A, __m128 __B) { - // CHECK-LABEL: @test_mm_maskz_max_ss + // CHECK-LABEL: test_mm_maskz_max_ss // CHECK: @llvm.x86.avx512.mask.max.ss.round return _mm_maskz_max_ss(__U,__A,__B); } __m128d test_mm_max_round_sd(__m128d __A, __m128d __B) { - // CHECK-LABEL: @test_mm_max_round_sd + // CHECK-LABEL: test_mm_max_round_sd // CHECK: @llvm.x86.avx512.mask.max.sd.round return _mm_max_round_sd(__A,__B,0x08); } __m128d test_mm_mask_max_round_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { - // CHECK-LABEL: @test_mm_mask_max_round_sd + // CHECK-LABEL: test_mm_mask_max_round_sd // CHECK: @llvm.x86.avx512.mask.max.sd.round return _mm_mask_max_round_sd(__W,__U,__A,__B,0x08); } __m128d test_mm_maskz_max_round_sd(__mmask8 __U, __m128d __A, __m128d __B) { - // CHECK-LABEL: @test_mm_maskz_max_round_sd + // CHECK-LABEL: test_mm_maskz_max_round_sd // CHECK: @llvm.x86.avx512.mask.max.sd.round return _mm_maskz_max_round_sd(__U,__A,__B,0x08); } __m128d test_mm_mask_max_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { - // CHECK-LABEL: @test_mm_mask_max_sd + // CHECK-LABEL: test_mm_mask_max_sd // CHECK: @llvm.x86.avx512.mask.max.sd.round return _mm_mask_max_sd(__W,__U,__A,__B); } __m128d test_mm_maskz_max_sd(__mmask8 __U, __m128d __A, __m128d __B) { - // CHECK-LABEL: @test_mm_maskz_max_sd + // CHECK-LABEL: test_mm_maskz_max_sd // CHECK: @llvm.x86.avx512.mask.max.sd.round return _mm_maskz_max_sd(__U,__A,__B); } __m128 test_mm_min_round_ss(__m128 __A, __m128 __B) { - // CHECK-LABEL: @test_mm_min_round_ss + // CHECK-LABEL: test_mm_min_round_ss // CHECK: @llvm.x86.avx512.mask.min.ss.round return _mm_min_round_ss(__A,__B,0x08); } __m128 test_mm_mask_min_round_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { - // CHECK-LABEL: @test_mm_mask_min_round_ss + // CHECK-LABEL: test_mm_mask_min_round_ss // CHECK: @llvm.x86.avx512.mask.min.ss.round return _mm_mask_min_round_ss(__W,__U,__A,__B,0x08); } __m128 test_mm_maskz_min_round_ss(__mmask8 __U, __m128 __A, __m128 __B) { - // CHECK-LABEL: @test_mm_maskz_min_round_ss + // CHECK-LABEL: test_mm_maskz_min_round_ss // CHECK: @llvm.x86.avx512.mask.min.ss.round return _mm_maskz_min_round_ss(__U,__A,__B,0x08); } __m128 test_mm_mask_min_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { - // CHECK-LABEL: @test_mm_mask_min_ss + // CHECK-LABEL: test_mm_mask_min_ss // CHECK: @llvm.x86.avx512.mask.min.ss.round return _mm_mask_min_ss(__W,__U,__A,__B); } __m128 test_mm_maskz_min_ss(__mmask8 __U, __m128 __A, __m128 __B) { - // CHECK-LABEL: @test_mm_maskz_min_ss + // CHECK-LABEL: test_mm_maskz_min_ss // CHECK: @llvm.x86.avx512.mask.min.ss.round return _mm_maskz_min_ss(__U,__A,__B); } __m128d test_mm_min_round_sd(__m128d __A, __m128d __B) { - // CHECK-LABEL: @test_mm_min_round_sd + // CHECK-LABEL: test_mm_min_round_sd // CHECK: @llvm.x86.avx512.mask.min.sd.round return _mm_min_round_sd(__A,__B,0x08); } __m128d test_mm_mask_min_round_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { - // CHECK-LABEL: @test_mm_mask_min_round_sd + // CHECK-LABEL: test_mm_mask_min_round_sd // CHECK: @llvm.x86.avx512.mask.min.sd.round return _mm_mask_min_round_sd(__W,__U,__A,__B,0x08); } __m128d test_mm_maskz_min_round_sd(__mmask8 __U, __m128d __A, __m128d __B) { - // CHECK-LABEL: @test_mm_maskz_min_round_sd + // CHECK-LABEL: test_mm_maskz_min_round_sd // CHECK: @llvm.x86.avx512.mask.min.sd.round return _mm_maskz_min_round_sd(__U,__A,__B,0x08); } __m128d test_mm_mask_min_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { - // CHECK-LABEL: @test_mm_mask_min_sd + // CHECK-LABEL: test_mm_mask_min_sd // CHECK: @llvm.x86.avx512.mask.min.sd.round return _mm_mask_min_sd(__W,__U,__A,__B); } __m128d test_mm_maskz_min_sd(__mmask8 __U, __m128d __A, __m128d __B) { - // CHECK-LABEL: @test_mm_maskz_min_sd + // CHECK-LABEL: test_mm_maskz_min_sd // CHECK: @llvm.x86.avx512.mask.min.sd.round return _mm_maskz_min_sd(__U,__A,__B); } __m512 test_mm512_undefined(void) { - // CHECK-LABEL: @test_mm512_undefined + // CHECK-LABEL: test_mm512_undefined // CHECK: ret <16 x float> zeroinitializer return _mm512_undefined(); } __m512 test_mm512_undefined_ps(void) { - // CHECK-LABEL: @test_mm512_undefined_ps + // CHECK-LABEL: test_mm512_undefined_ps // CHECK: ret <16 x float> zeroinitializer return _mm512_undefined_ps(); } __m512d test_mm512_undefined_pd(void) { - // CHECK-LABEL: @test_mm512_undefined_pd + // CHECK-LABEL: test_mm512_undefined_pd // CHECK: ret <8 x double> zeroinitializer return _mm512_undefined_pd(); } __m512i test_mm512_undefined_epi32(void) { - // CHECK-LABEL: @test_mm512_undefined_epi32 + // CHECK-LABEL: test_mm512_undefined_epi32 // CHECK: ret <8 x i64> zeroinitializer return _mm512_undefined_epi32(); } __m512i test_mm512_cvtepi8_epi32(__m128i __A) { - // CHECK-LABEL: @test_mm512_cvtepi8_epi32 + // CHECK-LABEL: test_mm512_cvtepi8_epi32 // CHECK: sext <16 x i8> %{{.*}} to <16 x i32> return _mm512_cvtepi8_epi32(__A); } __m512i test_mm512_mask_cvtepi8_epi32(__m512i __W, __mmask16 __U, __m128i __A) { - // CHECK-LABEL: @test_mm512_mask_cvtepi8_epi32 + // CHECK-LABEL: test_mm512_mask_cvtepi8_epi32 // CHECK: sext <16 x i8> %{{.*}} to <16 x i32> // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_mask_cvtepi8_epi32(__W, __U, __A); } __m512i test_mm512_maskz_cvtepi8_epi32(__mmask16 __U, __m128i __A) { - // CHECK-LABEL: @test_mm512_maskz_cvtepi8_epi32 + // CHECK-LABEL: test_mm512_maskz_cvtepi8_epi32 // CHECK: sext <16 x i8> %{{.*}} to <16 x i32> // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_maskz_cvtepi8_epi32(__U, __A); } __m512i test_mm512_cvtepi8_epi64(__m128i __A) { - // CHECK-LABEL: @test_mm512_cvtepi8_epi64 + // CHECK-LABEL: test_mm512_cvtepi8_epi64 // CHECK: sext <8 x i8> %{{.*}} to <8 x i64> return _mm512_cvtepi8_epi64(__A); } __m512i test_mm512_mask_cvtepi8_epi64(__m512i __W, __mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm512_mask_cvtepi8_epi64 + // CHECK-LABEL: test_mm512_mask_cvtepi8_epi64 // CHECK: sext <8 x i8> %{{.*}} to <8 x i64> // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_mask_cvtepi8_epi64(__W, __U, __A); } __m512i test_mm512_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm512_maskz_cvtepi8_epi64 + // CHECK-LABEL: test_mm512_maskz_cvtepi8_epi64 // CHECK: sext <8 x i8> %{{.*}} to <8 x i64> // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_maskz_cvtepi8_epi64(__U, __A); } __m512i test_mm512_cvtepi32_epi64(__m256i __X) { - // CHECK-LABEL: @test_mm512_cvtepi32_epi64 + // CHECK-LABEL: test_mm512_cvtepi32_epi64 // CHECK: sext <8 x i32> %{{.*}} to <8 x i64> return _mm512_cvtepi32_epi64(__X); } __m512i test_mm512_mask_cvtepi32_epi64(__m512i __W, __mmask8 __U, __m256i __X) { - // CHECK-LABEL: @test_mm512_mask_cvtepi32_epi64 + // CHECK-LABEL: test_mm512_mask_cvtepi32_epi64 // CHECK: sext <8 x i32> %{{.*}} to <8 x i64> // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_mask_cvtepi32_epi64(__W, __U, __X); } __m512i test_mm512_maskz_cvtepi32_epi64(__mmask8 __U, __m256i __X) { - // CHECK-LABEL: @test_mm512_maskz_cvtepi32_epi64 + // CHECK-LABEL: test_mm512_maskz_cvtepi32_epi64 // CHECK: sext <8 x i32> %{{.*}} to <8 x i64> // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_maskz_cvtepi32_epi64(__U, __X); } __m512i test_mm512_cvtepi16_epi32(__m256i __A) { - // CHECK-LABEL: @test_mm512_cvtepi16_epi32 + // CHECK-LABEL: test_mm512_cvtepi16_epi32 // CHECK: sext <16 x i16> %{{.*}} to <16 x i32> return _mm512_cvtepi16_epi32(__A); } __m512i test_mm512_mask_cvtepi16_epi32(__m512i __W, __mmask16 __U, __m256i __A) { - // CHECK-LABEL: @test_mm512_mask_cvtepi16_epi32 + // CHECK-LABEL: test_mm512_mask_cvtepi16_epi32 // CHECK: sext <16 x i16> %{{.*}} to <16 x i32> // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_mask_cvtepi16_epi32(__W, __U, __A); } __m512i test_mm512_maskz_cvtepi16_epi32(__mmask16 __U, __m256i __A) { - // CHECK-LABEL: @test_mm512_maskz_cvtepi16_epi32 + // CHECK-LABEL: test_mm512_maskz_cvtepi16_epi32 // CHECK: sext <16 x i16> %{{.*}} to <16 x i32> // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_maskz_cvtepi16_epi32(__U, __A); } __m512i test_mm512_cvtepi16_epi64(__m128i __A) { - // CHECK-LABEL: @test_mm512_cvtepi16_epi64 + // CHECK-LABEL: test_mm512_cvtepi16_epi64 // CHECK: sext <8 x i16> %{{.*}} to <8 x i64> return _mm512_cvtepi16_epi64(__A); } __m512i test_mm512_mask_cvtepi16_epi64(__m512i __W, __mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm512_mask_cvtepi16_epi64 + // CHECK-LABEL: test_mm512_mask_cvtepi16_epi64 // CHECK: sext <8 x i16> %{{.*}} to <8 x i64> // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_mask_cvtepi16_epi64(__W, __U, __A); } __m512i test_mm512_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm512_maskz_cvtepi16_epi64 + // CHECK-LABEL: test_mm512_maskz_cvtepi16_epi64 // CHECK: sext <8 x i16> %{{.*}} to <8 x i64> // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_maskz_cvtepi16_epi64(__U, __A); } __m512i test_mm512_cvtepu8_epi32(__m128i __A) { - // CHECK-LABEL: @test_mm512_cvtepu8_epi32 + // CHECK-LABEL: test_mm512_cvtepu8_epi32 // CHECK: zext <16 x i8> %{{.*}} to <16 x i32> return _mm512_cvtepu8_epi32(__A); } __m512i test_mm512_mask_cvtepu8_epi32(__m512i __W, __mmask16 __U, __m128i __A) { - // CHECK-LABEL: @test_mm512_mask_cvtepu8_epi32 + // CHECK-LABEL: test_mm512_mask_cvtepu8_epi32 // CHECK: zext <16 x i8> %{{.*}} to <16 x i32> // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_mask_cvtepu8_epi32(__W, __U, __A); } __m512i test_mm512_maskz_cvtepu8_epi32(__mmask16 __U, __m128i __A) { - // CHECK-LABEL: @test_mm512_maskz_cvtepu8_epi32 + // CHECK-LABEL: test_mm512_maskz_cvtepu8_epi32 // CHECK: zext <16 x i8> %{{.*}} to <16 x i32> // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_maskz_cvtepu8_epi32(__U, __A); } __m512i test_mm512_cvtepu8_epi64(__m128i __A) { - // CHECK-LABEL: @test_mm512_cvtepu8_epi64 + // CHECK-LABEL: test_mm512_cvtepu8_epi64 // CHECK: zext <8 x i8> %{{.*}} to <8 x i64> return _mm512_cvtepu8_epi64(__A); } __m512i test_mm512_mask_cvtepu8_epi64(__m512i __W, __mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm512_mask_cvtepu8_epi64 + // CHECK-LABEL: test_mm512_mask_cvtepu8_epi64 // CHECK: zext <8 x i8> %{{.*}} to <8 x i64> // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_mask_cvtepu8_epi64(__W, __U, __A); } __m512i test_mm512_maskz_cvtepu8_epi64(__mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm512_maskz_cvtepu8_epi64 + // CHECK-LABEL: test_mm512_maskz_cvtepu8_epi64 // CHECK: zext <8 x i8> %{{.*}} to <8 x i64> // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_maskz_cvtepu8_epi64(__U, __A); } __m512i test_mm512_cvtepu32_epi64(__m256i __X) { - // CHECK-LABEL: @test_mm512_cvtepu32_epi64 + // CHECK-LABEL: test_mm512_cvtepu32_epi64 // CHECK: zext <8 x i32> %{{.*}} to <8 x i64> return _mm512_cvtepu32_epi64(__X); } __m512i test_mm512_mask_cvtepu32_epi64(__m512i __W, __mmask8 __U, __m256i __X) { - // CHECK-LABEL: @test_mm512_mask_cvtepu32_epi64 + // CHECK-LABEL: test_mm512_mask_cvtepu32_epi64 // CHECK: zext <8 x i32> %{{.*}} to <8 x i64> // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_mask_cvtepu32_epi64(__W, __U, __X); } __m512i test_mm512_maskz_cvtepu32_epi64(__mmask8 __U, __m256i __X) { - // CHECK-LABEL: @test_mm512_maskz_cvtepu32_epi64 + // CHECK-LABEL: test_mm512_maskz_cvtepu32_epi64 // CHECK: zext <8 x i32> %{{.*}} to <8 x i64> // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_maskz_cvtepu32_epi64(__U, __X); } __m512i test_mm512_cvtepu16_epi32(__m256i __A) { - // CHECK-LABEL: @test_mm512_cvtepu16_epi32 + // CHECK-LABEL: test_mm512_cvtepu16_epi32 // CHECK: zext <16 x i16> %{{.*}} to <16 x i32> return _mm512_cvtepu16_epi32(__A); } __m512i test_mm512_mask_cvtepu16_epi32(__m512i __W, __mmask16 __U, __m256i __A) { - // CHECK-LABEL: @test_mm512_mask_cvtepu16_epi32 + // CHECK-LABEL: test_mm512_mask_cvtepu16_epi32 // CHECK: zext <16 x i16> %{{.*}} to <16 x i32> // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_mask_cvtepu16_epi32(__W, __U, __A); } __m512i test_mm512_maskz_cvtepu16_epi32(__mmask16 __U, __m256i __A) { - // CHECK-LABEL: @test_mm512_maskz_cvtepu16_epi32 + // CHECK-LABEL: test_mm512_maskz_cvtepu16_epi32 // CHECK: zext <16 x i16> %{{.*}} to <16 x i32> // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_maskz_cvtepu16_epi32(__U, __A); } __m512i test_mm512_cvtepu16_epi64(__m128i __A) { - // CHECK-LABEL: @test_mm512_cvtepu16_epi64 + // CHECK-LABEL: test_mm512_cvtepu16_epi64 // CHECK: zext <8 x i16> %{{.*}} to <8 x i64> return _mm512_cvtepu16_epi64(__A); } __m512i test_mm512_mask_cvtepu16_epi64(__m512i __W, __mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm512_mask_cvtepu16_epi64 + // CHECK-LABEL: test_mm512_mask_cvtepu16_epi64 // CHECK: zext <8 x i16> %{{.*}} to <8 x i64> // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_mask_cvtepu16_epi64(__W, __U, __A); } __m512i test_mm512_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A) { - // CHECK-LABEL: @test_mm512_maskz_cvtepu16_epi64 + // CHECK-LABEL: test_mm512_maskz_cvtepu16_epi64 // CHECK: zext <8 x i16> %{{.*}} to <8 x i64> // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_maskz_cvtepu16_epi64(__U, __A); @@ -3998,120 +4000,120 @@ __m512i test_mm512_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A) { __m512i test_mm512_rol_epi32(__m512i __A) { - // CHECK-LABEL: @test_mm512_rol_epi32 + // CHECK-LABEL: test_mm512_rol_epi32 // CHECK: @llvm.fshl.v16i32 return _mm512_rol_epi32(__A, 5); } __m512i test_mm512_mask_rol_epi32(__m512i __W, __mmask16 __U, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_rol_epi32 + // CHECK-LABEL: test_mm512_mask_rol_epi32 // CHECK: @llvm.fshl.v16i32 // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_mask_rol_epi32(__W, __U, __A, 5); } __m512i test_mm512_maskz_rol_epi32(__mmask16 __U, __m512i __A) { - // CHECK-LABEL: @test_mm512_maskz_rol_epi32 + // CHECK-LABEL: test_mm512_maskz_rol_epi32 // CHECK: @llvm.fshl.v16i32 // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_maskz_rol_epi32(__U, __A, 5); } __m512i test_mm512_rol_epi64(__m512i __A) { - // CHECK-LABEL: @test_mm512_rol_epi64 + // CHECK-LABEL: test_mm512_rol_epi64 // CHECK: @llvm.fshl.v8i64 return _mm512_rol_epi64(__A, 5); } __m512i test_mm512_mask_rol_epi64(__m512i __W, __mmask8 __U, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_rol_epi64 + // CHECK-LABEL: test_mm512_mask_rol_epi64 // CHECK: @llvm.fshl.v8i64 // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_mask_rol_epi64(__W, __U, __A, 5); } __m512i test_mm512_maskz_rol_epi64(__mmask8 __U, __m512i __A) { - // CHECK-LABEL: @test_mm512_maskz_rol_epi64 + // CHECK-LABEL: test_mm512_maskz_rol_epi64 // CHECK: @llvm.fshl.v8i64 // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_maskz_rol_epi64(__U, __A, 5); } __m512i test_mm512_rolv_epi32(__m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_rolv_epi32 + // CHECK-LABEL: test_mm512_rolv_epi32 // CHECK: @llvm.fshl.v16i32 return _mm512_rolv_epi32(__A, __B); } __m512i test_mm512_mask_rolv_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_mask_rolv_epi32 + // CHECK-LABEL: test_mm512_mask_rolv_epi32 // CHECK: @llvm.fshl.v16i32 // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_mask_rolv_epi32(__W, __U, __A, __B); } __m512i test_mm512_maskz_rolv_epi32(__mmask16 __U, __m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_maskz_rolv_epi32 + // CHECK-LABEL: test_mm512_maskz_rolv_epi32 // CHECK: @llvm.fshl.v16i32 // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_maskz_rolv_epi32(__U, __A, __B); } __m512i test_mm512_rolv_epi64(__m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_rolv_epi64 + // CHECK-LABEL: test_mm512_rolv_epi64 // CHECK: @llvm.fshl.v8i64 return _mm512_rolv_epi64(__A, __B); } __m512i test_mm512_mask_rolv_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_mask_rolv_epi64 + // CHECK-LABEL: test_mm512_mask_rolv_epi64 // CHECK: @llvm.fshl.v8i64 // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_mask_rolv_epi64(__W, __U, __A, __B); } __m512i test_mm512_maskz_rolv_epi64(__mmask8 __U, __m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_maskz_rolv_epi64 + // CHECK-LABEL: test_mm512_maskz_rolv_epi64 // CHECK: @llvm.fshl.v8i64 // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_maskz_rolv_epi64(__U, __A, __B); } __m512i test_mm512_ror_epi32(__m512i __A) { - // CHECK-LABEL: @test_mm512_ror_epi32 + // CHECK-LABEL: test_mm512_ror_epi32 // CHECK: @llvm.fshr.v16i32 return _mm512_ror_epi32(__A, 5); } __m512i test_mm512_mask_ror_epi32(__m512i __W, __mmask16 __U, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_ror_epi32 + // CHECK-LABEL: test_mm512_mask_ror_epi32 // CHECK: @llvm.fshr.v16i32 // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_mask_ror_epi32(__W, __U, __A, 5); } __m512i test_mm512_maskz_ror_epi32(__mmask16 __U, __m512i __A) { - // CHECK-LABEL: @test_mm512_maskz_ror_epi32 + // CHECK-LABEL: test_mm512_maskz_ror_epi32 // CHECK: @llvm.fshr.v16i32 // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_maskz_ror_epi32(__U, __A, 5); } __m512i test_mm512_ror_epi64(__m512i __A) { - // CHECK-LABEL: @test_mm512_ror_epi64 + // CHECK-LABEL: test_mm512_ror_epi64 // CHECK: @llvm.fshr.v8i64 return _mm512_ror_epi64(__A, 5); } __m512i test_mm512_mask_ror_epi64(__m512i __W, __mmask8 __U, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_ror_epi64 + // CHECK-LABEL: test_mm512_mask_ror_epi64 // CHECK: @llvm.fshr.v8i64 // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_mask_ror_epi64(__W, __U, __A, 5); } __m512i test_mm512_maskz_ror_epi64(__mmask8 __U, __m512i __A) { - // CHECK-LABEL: @test_mm512_maskz_ror_epi64 + // CHECK-LABEL: test_mm512_maskz_ror_epi64 // CHECK: @llvm.fshr.v8i64 // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_maskz_ror_epi64(__U, __A, 5); @@ -4119,607 +4121,607 @@ __m512i test_mm512_maskz_ror_epi64(__mmask8 __U, __m512i __A) { __m512i test_mm512_rorv_epi32(__m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_rorv_epi32 + // CHECK-LABEL: test_mm512_rorv_epi32 // CHECK: @llvm.fshr.v16i32 return _mm512_rorv_epi32(__A, __B); } __m512i test_mm512_mask_rorv_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_mask_rorv_epi32 + // CHECK-LABEL: test_mm512_mask_rorv_epi32 // CHECK: @llvm.fshr.v16i32 // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_mask_rorv_epi32(__W, __U, __A, __B); } __m512i test_mm512_maskz_rorv_epi32(__mmask16 __U, __m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_maskz_rorv_epi32 + // CHECK-LABEL: test_mm512_maskz_rorv_epi32 // CHECK: @llvm.fshr.v16i32 // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_maskz_rorv_epi32(__U, __A, __B); } __m512i test_mm512_rorv_epi64(__m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_rorv_epi64 + // CHECK-LABEL: test_mm512_rorv_epi64 // CHECK: @llvm.fshr.v8i64 return _mm512_rorv_epi64(__A, __B); } __m512i test_mm512_mask_rorv_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_mask_rorv_epi64 + // CHECK-LABEL: test_mm512_mask_rorv_epi64 // CHECK: @llvm.fshr.v8i64 // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_mask_rorv_epi64(__W, __U, __A, __B); } __m512i test_mm512_maskz_rorv_epi64(__mmask8 __U, __m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_maskz_rorv_epi64 + // CHECK-LABEL: test_mm512_maskz_rorv_epi64 // CHECK: @llvm.fshr.v8i64 // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_maskz_rorv_epi64(__U, __A, __B); } __m512i test_mm512_slli_epi32(__m512i __A) { - // CHECK-LABEL: @test_mm512_slli_epi32 + // CHECK-LABEL: test_mm512_slli_epi32 // CHECK: @llvm.x86.avx512.pslli.d.512 return _mm512_slli_epi32(__A, 5); } __m512i test_mm512_slli_epi32_2(__m512i __A, unsigned int __B) { - // CHECK-LABEL: @test_mm512_slli_epi32_2 + // CHECK-LABEL: test_mm512_slli_epi32_2 // CHECK: @llvm.x86.avx512.pslli.d.512 return _mm512_slli_epi32(__A, __B); } __m512i test_mm512_mask_slli_epi32(__m512i __W, __mmask16 __U, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_slli_epi32 + // CHECK-LABEL: test_mm512_mask_slli_epi32 // CHECK: @llvm.x86.avx512.pslli.d.512 // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_mask_slli_epi32(__W, __U, __A, 5); } __m512i test_mm512_mask_slli_epi32_2(__m512i __W, __mmask16 __U, __m512i __A, unsigned int __B) { - // CHECK-LABEL: @test_mm512_mask_slli_epi32_2 + // CHECK-LABEL: test_mm512_mask_slli_epi32_2 // CHECK: @llvm.x86.avx512.pslli.d.512 // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_mask_slli_epi32(__W, __U, __A, __B); } __m512i test_mm512_maskz_slli_epi32(__mmask16 __U, __m512i __A) { - // CHECK-LABEL: @test_mm512_maskz_slli_epi32 + // CHECK-LABEL: test_mm512_maskz_slli_epi32 // CHECK: @llvm.x86.avx512.pslli.d.512 // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_maskz_slli_epi32(__U, __A, 5); } __m512i test_mm512_maskz_slli_epi32_2(__mmask16 __U, __m512i __A, unsigned int __B) { - // CHECK-LABEL: @test_mm512_maskz_slli_epi32_2 + // CHECK-LABEL: test_mm512_maskz_slli_epi32_2 // CHECK: @llvm.x86.avx512.pslli.d.512 // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_maskz_slli_epi32(__U, __A, __B); } __m512i test_mm512_slli_epi64(__m512i __A) { - // CHECK-LABEL: @test_mm512_slli_epi64 + // CHECK-LABEL: test_mm512_slli_epi64 // CHECK: @llvm.x86.avx512.pslli.q.512 return _mm512_slli_epi64(__A, 5); } __m512i test_mm512_slli_epi64_2(__m512i __A, unsigned int __B) { - // CHECK-LABEL: @test_mm512_slli_epi64_2 + // CHECK-LABEL: test_mm512_slli_epi64_2 // CHECK: @llvm.x86.avx512.pslli.q.512 return _mm512_slli_epi64(__A, __B); } __m512i test_mm512_mask_slli_epi64(__m512i __W, __mmask8 __U, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_slli_epi64 + // CHECK-LABEL: test_mm512_mask_slli_epi64 // CHECK: @llvm.x86.avx512.pslli.q.512 // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_mask_slli_epi64(__W, __U, __A, 5); } __m512i test_mm512_mask_slli_epi64_2(__m512i __W, __mmask8 __U, __m512i __A, unsigned int __B) { - // CHECK-LABEL: @test_mm512_mask_slli_epi64_2 + // CHECK-LABEL: test_mm512_mask_slli_epi64_2 // CHECK: @llvm.x86.avx512.pslli.q.512 // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_mask_slli_epi64(__W, __U, __A, __B); } __m512i test_mm512_maskz_slli_epi64(__mmask8 __U, __m512i __A) { - // CHECK-LABEL: @test_mm512_maskz_slli_epi64 + // CHECK-LABEL: test_mm512_maskz_slli_epi64 // CHECK: @llvm.x86.avx512.pslli.q.512 // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_maskz_slli_epi64(__U, __A, 5); } __m512i test_mm512_maskz_slli_epi64_2(__mmask8 __U, __m512i __A, unsigned int __B) { - // CHECK-LABEL: @test_mm512_maskz_slli_epi64_2 + // CHECK-LABEL: test_mm512_maskz_slli_epi64_2 // CHECK: @llvm.x86.avx512.pslli.q.512 // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_maskz_slli_epi64(__U, __A, __B); } __m512i test_mm512_srli_epi32(__m512i __A) { - // CHECK-LABEL: @test_mm512_srli_epi32 + // CHECK-LABEL: test_mm512_srli_epi32 // CHECK: @llvm.x86.avx512.psrli.d.512 return _mm512_srli_epi32(__A, 5); } __m512i test_mm512_srli_epi32_2(__m512i __A, unsigned int __B) { - // CHECK-LABEL: @test_mm512_srli_epi32_2 + // CHECK-LABEL: test_mm512_srli_epi32_2 // CHECK: @llvm.x86.avx512.psrli.d.512 return _mm512_srli_epi32(__A, __B); } __m512i test_mm512_mask_srli_epi32(__m512i __W, __mmask16 __U, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_srli_epi32 + // CHECK-LABEL: test_mm512_mask_srli_epi32 // CHECK: @llvm.x86.avx512.psrli.d.512 // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_mask_srli_epi32(__W, __U, __A, 5); } __m512i test_mm512_mask_srli_epi32_2(__m512i __W, __mmask16 __U, __m512i __A, unsigned int __B) { - // CHECK-LABEL: @test_mm512_mask_srli_epi32_2 + // CHECK-LABEL: test_mm512_mask_srli_epi32_2 // CHECK: @llvm.x86.avx512.psrli.d.512 // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_mask_srli_epi32(__W, __U, __A, __B); } __m512i test_mm512_maskz_srli_epi32(__mmask16 __U, __m512i __A) { - // CHECK-LABEL: @test_mm512_maskz_srli_epi32 + // CHECK-LABEL: test_mm512_maskz_srli_epi32 // CHECK: @llvm.x86.avx512.psrli.d.512 // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_maskz_srli_epi32(__U, __A, 5); } __m512i test_mm512_maskz_srli_epi32_2(__mmask16 __U, __m512i __A, unsigned int __B) { - // CHECK-LABEL: @test_mm512_maskz_srli_epi32_2 + // CHECK-LABEL: test_mm512_maskz_srli_epi32_2 // CHECK: @llvm.x86.avx512.psrli.d.512 // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_maskz_srli_epi32(__U, __A, __B); } __m512i test_mm512_srli_epi64(__m512i __A) { - // CHECK-LABEL: @test_mm512_srli_epi64 + // CHECK-LABEL: test_mm512_srli_epi64 // CHECK: @llvm.x86.avx512.psrli.q.512 return _mm512_srli_epi64(__A, 5); } __m512i test_mm512_srli_epi64_2(__m512i __A, unsigned int __B) { - // CHECK-LABEL: @test_mm512_srli_epi64_2 + // CHECK-LABEL: test_mm512_srli_epi64_2 // CHECK: @llvm.x86.avx512.psrli.q.512 return _mm512_srli_epi64(__A, __B); } __m512i test_mm512_mask_srli_epi64(__m512i __W, __mmask8 __U, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_srli_epi64 + // CHECK-LABEL: test_mm512_mask_srli_epi64 // CHECK: @llvm.x86.avx512.psrli.q.512 // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_mask_srli_epi64(__W, __U, __A, 5); } __m512i test_mm512_mask_srli_epi64_2(__m512i __W, __mmask8 __U, __m512i __A, unsigned int __B) { - // CHECK-LABEL: @test_mm512_mask_srli_epi64_2 + // CHECK-LABEL: test_mm512_mask_srli_epi64_2 // CHECK: @llvm.x86.avx512.psrli.q.512 // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_mask_srli_epi64(__W, __U, __A, __B); } __m512i test_mm512_maskz_srli_epi64(__mmask8 __U, __m512i __A) { - // CHECK-LABEL: @test_mm512_maskz_srli_epi64 + // CHECK-LABEL: test_mm512_maskz_srli_epi64 // CHECK: @llvm.x86.avx512.psrli.q.512 // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_maskz_srli_epi64(__U, __A, 5); } __m512i test_mm512_maskz_srli_epi64_2(__mmask8 __U, __m512i __A, unsigned int __B) { - // CHECK-LABEL: @test_mm512_maskz_srli_epi64_2 + // CHECK-LABEL: test_mm512_maskz_srli_epi64_2 // CHECK: @llvm.x86.avx512.psrli.q.512 // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_maskz_srli_epi64(__U, __A, __B); } __m512i test_mm512_mask_load_epi32(__m512i __W, __mmask16 __U, void const *__P) { - // CHECK-LABEL: @test_mm512_mask_load_epi32 + // CHECK-LABEL: test_mm512_mask_load_epi32 // CHECK: @llvm.masked.load.v16i32.p0(ptr %{{.*}}, i32 64, <16 x i1> %{{.*}}, <16 x i32> %{{.*}}) return _mm512_mask_load_epi32(__W, __U, __P); } __m512i test_mm512_maskz_load_epi32(__mmask16 __U, void const *__P) { - // CHECK-LABEL: @test_mm512_maskz_load_epi32 + // CHECK-LABEL: test_mm512_maskz_load_epi32 // CHECK: @llvm.masked.load.v16i32.p0(ptr %{{.*}}, i32 64, <16 x i1> %{{.*}}, <16 x i32> %{{.*}}) return _mm512_maskz_load_epi32(__U, __P); } __m512i test_mm512_mask_mov_epi32(__m512i __W, __mmask16 __U, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_mov_epi32 + // CHECK-LABEL: test_mm512_mask_mov_epi32 // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_mask_mov_epi32(__W, __U, __A); } __m512i test_mm512_maskz_mov_epi32(__mmask16 __U, __m512i __A) { - // CHECK-LABEL: @test_mm512_maskz_mov_epi32 + // CHECK-LABEL: test_mm512_maskz_mov_epi32 // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_maskz_mov_epi32(__U, __A); } __m512i test_mm512_mask_mov_epi64(__m512i __W, __mmask8 __U, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_mov_epi64 + // CHECK-LABEL: test_mm512_mask_mov_epi64 // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_mask_mov_epi64(__W, __U, __A); } __m512i test_mm512_maskz_mov_epi64(__mmask8 __U, __m512i __A) { - // CHECK-LABEL: @test_mm512_maskz_mov_epi64 + // CHECK-LABEL: test_mm512_maskz_mov_epi64 // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_maskz_mov_epi64(__U, __A); } __m512i test_mm512_mask_load_epi64(__m512i __W, __mmask8 __U, void const *__P) { - // CHECK-LABEL: @test_mm512_mask_load_epi64 + // CHECK-LABEL: test_mm512_mask_load_epi64 // CHECK: @llvm.masked.load.v8i64.p0(ptr %{{.*}}, i32 64, <8 x i1> %{{.*}}, <8 x i64> %{{.*}}) return _mm512_mask_load_epi64(__W, __U, __P); } __m512i test_mm512_maskz_load_epi64(__mmask8 __U, void const *__P) { - // CHECK-LABEL: @test_mm512_maskz_load_epi64 + // CHECK-LABEL: test_mm512_maskz_load_epi64 // CHECK: @llvm.masked.load.v8i64.p0(ptr %{{.*}}, i32 64, <8 x i1> %{{.*}}, <8 x i64> %{{.*}}) return _mm512_maskz_load_epi64(__U, __P); } void test_mm512_mask_store_epi32(void *__P, __mmask16 __U, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_store_epi32 + // CHECK-LABEL: test_mm512_mask_store_epi32 // CHECK: @llvm.masked.store.v16i32.p0(<16 x i32> %{{.*}}, ptr %{{.*}}, i32 64, <16 x i1> %{{.*}}) return _mm512_mask_store_epi32(__P, __U, __A); } void test_mm512_mask_store_epi64(void *__P, __mmask8 __U, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_store_epi64 + // CHECK-LABEL: test_mm512_mask_store_epi64 // CHECK: @llvm.masked.store.v8i64.p0(<8 x i64> %{{.*}}, ptr %{{.*}}, i32 64, <8 x i1> %{{.*}}) return _mm512_mask_store_epi64(__P, __U, __A); } __m512d test_mm512_movedup_pd(__m512d __A) { - // CHECK-LABEL: @test_mm512_movedup_pd + // CHECK-LABEL: test_mm512_movedup_pd // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x i32> return _mm512_movedup_pd(__A); } __m512d test_mm512_mask_movedup_pd(__m512d __W, __mmask8 __U, __m512d __A) { - // CHECK-LABEL: @test_mm512_mask_movedup_pd + // CHECK-LABEL: test_mm512_mask_movedup_pd // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x i32> // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} return _mm512_mask_movedup_pd(__W, __U, __A); } __m512d test_mm512_maskz_movedup_pd(__mmask8 __U, __m512d __A) { - // CHECK-LABEL: @test_mm512_maskz_movedup_pd + // CHECK-LABEL: test_mm512_maskz_movedup_pd // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x i32> // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} return _mm512_maskz_movedup_pd(__U, __A); } int test_mm_comi_round_sd(__m128d __A, __m128d __B) { - // CHECK-LABEL: @test_mm_comi_round_sd + // CHECK-LABEL: test_mm_comi_round_sd // CHECK: @llvm.x86.avx512.vcomi.sd return _mm_comi_round_sd(__A, __B, 5, _MM_FROUND_NO_EXC); } int test_mm_comi_round_ss(__m128 __A, __m128 __B) { - // CHECK-LABEL: @test_mm_comi_round_ss + // CHECK-LABEL: test_mm_comi_round_ss // CHECK: @llvm.x86.avx512.vcomi.ss return _mm_comi_round_ss(__A, __B, 5, _MM_FROUND_NO_EXC); } __m512d test_mm512_fixupimm_round_pd(__m512d __A, __m512d __B, __m512i __C) { - // CHECK-LABEL: @test_mm512_fixupimm_round_pd + // CHECK-LABEL: test_mm512_fixupimm_round_pd // CHECK: @llvm.x86.avx512.mask.fixupimm.pd.512 return _mm512_fixupimm_round_pd(__A, __B, __C, 5, 8); } __m512d test_mm512_mask_fixupimm_round_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512i __C) { - // CHECK-LABEL: @test_mm512_mask_fixupimm_round_pd + // CHECK-LABEL: test_mm512_mask_fixupimm_round_pd // CHECK: @llvm.x86.avx512.mask.fixupimm.pd.512 return _mm512_mask_fixupimm_round_pd(__A, __U, __B, __C, 5, 8); } __m512d test_mm512_fixupimm_pd(__m512d __A, __m512d __B, __m512i __C) { - // CHECK-LABEL: @test_mm512_fixupimm_pd + // CHECK-LABEL: test_mm512_fixupimm_pd // CHECK: @llvm.x86.avx512.mask.fixupimm.pd.512 return _mm512_fixupimm_pd(__A, __B, __C, 5); } __m512d test_mm512_mask_fixupimm_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512i __C) { - // CHECK-LABEL: @test_mm512_mask_fixupimm_pd + // CHECK-LABEL: test_mm512_mask_fixupimm_pd // CHECK: @llvm.x86.avx512.mask.fixupimm.pd.512 return _mm512_mask_fixupimm_pd(__A, __U, __B, __C, 5); } __m512d test_mm512_maskz_fixupimm_round_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512i __C) { - // CHECK-LABEL: @test_mm512_maskz_fixupimm_round_pd + // CHECK-LABEL: test_mm512_maskz_fixupimm_round_pd // CHECK: @llvm.x86.avx512.maskz.fixupimm.pd.512 return _mm512_maskz_fixupimm_round_pd(__U, __A, __B, __C, 5, 8); } __m512d test_mm512_maskz_fixupimm_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512i __C) { - // CHECK-LABEL: @test_mm512_maskz_fixupimm_pd + // CHECK-LABEL: test_mm512_maskz_fixupimm_pd // CHECK: @llvm.x86.avx512.maskz.fixupimm.pd.512 return _mm512_maskz_fixupimm_pd(__U, __A, __B, __C, 5); } __m512 test_mm512_fixupimm_round_ps(__m512 __A, __m512 __B, __m512i __C) { - // CHECK-LABEL: @test_mm512_fixupimm_round_ps + // CHECK-LABEL: test_mm512_fixupimm_round_ps // CHECK: @llvm.x86.avx512.mask.fixupimm.ps.512 return _mm512_fixupimm_round_ps(__A, __B, __C, 5, 8); } __m512 test_mm512_mask_fixupimm_round_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512i __C) { - // CHECK-LABEL: @test_mm512_mask_fixupimm_round_ps + // CHECK-LABEL: test_mm512_mask_fixupimm_round_ps // CHECK: @llvm.x86.avx512.mask.fixupimm.ps.512 return _mm512_mask_fixupimm_round_ps(__A, __U, __B, __C, 5, 8); } __m512 test_mm512_fixupimm_ps(__m512 __A, __m512 __B, __m512i __C) { - // CHECK-LABEL: @test_mm512_fixupimm_ps + // CHECK-LABEL: test_mm512_fixupimm_ps // CHECK: @llvm.x86.avx512.mask.fixupimm.ps.512 return _mm512_fixupimm_ps(__A, __B, __C, 5); } __m512 test_mm512_mask_fixupimm_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512i __C) { - // CHECK-LABEL: @test_mm512_mask_fixupimm_ps + // CHECK-LABEL: test_mm512_mask_fixupimm_ps // CHECK: @llvm.x86.avx512.mask.fixupimm.ps.512 return _mm512_mask_fixupimm_ps(__A, __U, __B, __C, 5); } __m512 test_mm512_maskz_fixupimm_round_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512i __C) { - // CHECK-LABEL: @test_mm512_maskz_fixupimm_round_ps + // CHECK-LABEL: test_mm512_maskz_fixupimm_round_ps // CHECK: @llvm.x86.avx512.maskz.fixupimm.ps.512 return _mm512_maskz_fixupimm_round_ps(__U, __A, __B, __C, 5, 8); } __m512 test_mm512_maskz_fixupimm_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512i __C) { - // CHECK-LABEL: @test_mm512_maskz_fixupimm_ps + // CHECK-LABEL: test_mm512_maskz_fixupimm_ps // CHECK: @llvm.x86.avx512.maskz.fixupimm.ps.512 return _mm512_maskz_fixupimm_ps(__U, __A, __B, __C, 5); } __m128d test_mm_fixupimm_round_sd(__m128d __A, __m128d __B, __m128i __C) { - // CHECK-LABEL: @test_mm_fixupimm_round_sd + // CHECK-LABEL: test_mm_fixupimm_round_sd // CHECK: @llvm.x86.avx512.mask.fixupimm return _mm_fixupimm_round_sd(__A, __B, __C, 5, 8); } __m128d test_mm_mask_fixupimm_round_sd(__m128d __A, __mmask8 __U, __m128d __B, __m128i __C) { - // CHECK-LABEL: @test_mm_mask_fixupimm_round_sd + // CHECK-LABEL: test_mm_mask_fixupimm_round_sd // CHECK: @llvm.x86.avx512.mask.fixupimm return _mm_mask_fixupimm_round_sd(__A, __U, __B, __C, 5, 8); } __m128d test_mm_fixupimm_sd(__m128d __A, __m128d __B, __m128i __C) { - // CHECK-LABEL: @test_mm_fixupimm_sd + // CHECK-LABEL: test_mm_fixupimm_sd // CHECK: @llvm.x86.avx512.mask.fixupimm return _mm_fixupimm_sd(__A, __B, __C, 5); } __m128d test_mm_mask_fixupimm_sd(__m128d __A, __mmask8 __U, __m128d __B, __m128i __C) { - // CHECK-LABEL: @test_mm_mask_fixupimm_sd + // CHECK-LABEL: test_mm_mask_fixupimm_sd // CHECK: @llvm.x86.avx512.mask.fixupimm return _mm_mask_fixupimm_sd(__A, __U, __B, __C, 5); } __m128d test_mm_maskz_fixupimm_round_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128i __C) { - // CHECK-LABEL: @test_mm_maskz_fixupimm_round_sd + // CHECK-LABEL: test_mm_maskz_fixupimm_round_sd // CHECK: @llvm.x86.avx512.maskz.fixupimm return _mm_maskz_fixupimm_round_sd(__U, __A, __B, __C, 5, 8); } __m128d test_mm_maskz_fixupimm_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128i __C) { - // CHECK-LABEL: @test_mm_maskz_fixupimm_sd + // CHECK-LABEL: test_mm_maskz_fixupimm_sd // CHECK: @llvm.x86.avx512.maskz.fixupimm return _mm_maskz_fixupimm_sd(__U, __A, __B, __C, 5); } __m128 test_mm_fixupimm_round_ss(__m128 __A, __m128 __B, __m128i __C) { - // CHECK-LABEL: @test_mm_fixupimm_round_ss + // CHECK-LABEL: test_mm_fixupimm_round_ss // CHECK: @llvm.x86.avx512.mask.fixupimm return _mm_fixupimm_round_ss(__A, __B, __C, 5, 8); } __m128 test_mm_mask_fixupimm_round_ss(__m128 __A, __mmask8 __U, __m128 __B, __m128i __C) { - // CHECK-LABEL: @test_mm_mask_fixupimm_round_ss + // CHECK-LABEL: test_mm_mask_fixupimm_round_ss // CHECK: @llvm.x86.avx512.mask.fixupimm return _mm_mask_fixupimm_round_ss(__A, __U, __B, __C, 5, 8); } __m128 test_mm_fixupimm_ss(__m128 __A, __m128 __B, __m128i __C) { - // CHECK-LABEL: @test_mm_fixupimm_ss + // CHECK-LABEL: test_mm_fixupimm_ss // CHECK: @llvm.x86.avx512.mask.fixupimm return _mm_fixupimm_ss(__A, __B, __C, 5); } __m128 test_mm_mask_fixupimm_ss(__m128 __A, __mmask8 __U, __m128 __B, __m128i __C) { - // CHECK-LABEL: @test_mm_mask_fixupimm_ss + // CHECK-LABEL: test_mm_mask_fixupimm_ss // CHECK: @llvm.x86.avx512.mask.fixupimm return _mm_mask_fixupimm_ss(__A, __U, __B, __C, 5); } __m128 test_mm_maskz_fixupimm_round_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128i __C) { - // CHECK-LABEL: @test_mm_maskz_fixupimm_round_ss + // CHECK-LABEL: test_mm_maskz_fixupimm_round_ss // CHECK: @llvm.x86.avx512.maskz.fixupimm return _mm_maskz_fixupimm_round_ss(__U, __A, __B, __C, 5, 8); } __m128 test_mm_maskz_fixupimm_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128i __C) { - // CHECK-LABEL: @test_mm_maskz_fixupimm_ss + // CHECK-LABEL: test_mm_maskz_fixupimm_ss // CHECK: @llvm.x86.avx512.maskz.fixupimm return _mm_maskz_fixupimm_ss(__U, __A, __B, __C, 5); } __m128d test_mm_getexp_round_sd(__m128d __A, __m128d __B) { - // CHECK-LABEL: @test_mm_getexp_round_sd + // CHECK-LABEL: test_mm_getexp_round_sd // CHECK: @llvm.x86.avx512.mask.getexp.sd return _mm_getexp_round_sd(__A, __B, 8); } __m128d test_mm_getexp_sd(__m128d __A, __m128d __B) { - // CHECK-LABEL: @test_mm_getexp_sd + // CHECK-LABEL: test_mm_getexp_sd // CHECK: @llvm.x86.avx512.mask.getexp.sd return _mm_getexp_sd(__A, __B); } __m128 test_mm_getexp_round_ss(__m128 __A, __m128 __B) { - // CHECK-LABEL: @test_mm_getexp_round_ss + // CHECK-LABEL: test_mm_getexp_round_ss // CHECK: @llvm.x86.avx512.mask.getexp.ss return _mm_getexp_round_ss(__A, __B, 8); } __m128 test_mm_getexp_ss(__m128 __A, __m128 __B) { - // CHECK-LABEL: @test_mm_getexp_ss + // CHECK-LABEL: test_mm_getexp_ss // CHECK: @llvm.x86.avx512.mask.getexp.ss return _mm_getexp_ss(__A, __B); } __m128d test_mm_getmant_round_sd(__m128d __A, __m128d __B) { - // CHECK-LABEL: @test_mm_getmant_round_sd + // CHECK-LABEL: test_mm_getmant_round_sd // CHECK: @llvm.x86.avx512.mask.getmant.sd return _mm_getmant_round_sd(__A, __B, _MM_MANT_NORM_1_2, _MM_MANT_SIGN_src, 8); } __m128d test_mm_getmant_sd(__m128d __A, __m128d __B) { - // CHECK-LABEL: @test_mm_getmant_sd + // CHECK-LABEL: test_mm_getmant_sd // CHECK: @llvm.x86.avx512.mask.getmant.sd return _mm_getmant_sd(__A, __B, _MM_MANT_NORM_1_2, _MM_MANT_SIGN_src); } __m128 test_mm_getmant_round_ss(__m128 __A, __m128 __B) { - // CHECK-LABEL: @test_mm_getmant_round_ss + // CHECK-LABEL: test_mm_getmant_round_ss // CHECK: @llvm.x86.avx512.mask.getmant.ss return _mm_getmant_round_ss(__A, __B, _MM_MANT_NORM_1_2, _MM_MANT_SIGN_src, 8); } __m128 test_mm_getmant_ss(__m128 __A, __m128 __B) { - // CHECK-LABEL: @test_mm_getmant_ss + // CHECK-LABEL: test_mm_getmant_ss // CHECK: @llvm.x86.avx512.mask.getmant.ss return _mm_getmant_ss(__A, __B, _MM_MANT_NORM_1_2, _MM_MANT_SIGN_src); } __mmask16 test_mm512_kmov(__mmask16 __A) { - // CHECK-LABEL: @test_mm512_kmov + // CHECK-LABEL: test_mm512_kmov // CHECK: load i16, ptr %__A.addr.i, align 2{{$}} return _mm512_kmov(__A); } __m512d test_mm512_mask_unpackhi_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { - // CHECK-LABEL: @test_mm512_mask_unpackhi_pd + // CHECK-LABEL: test_mm512_mask_unpackhi_pd // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x i32> // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} return _mm512_mask_unpackhi_pd(__W, __U, __A, __B); } #if __x86_64__ long long test_mm_cvt_roundsd_si64(__m128d __A) { - // CHECK-LABEL: @test_mm_cvt_roundsd_si64 + // CHECK-LABEL: test_mm_cvt_roundsd_si64 // CHECK: @llvm.x86.avx512.vcvtsd2si64 return _mm_cvt_roundsd_si64(__A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } #endif __m512i test_mm512_mask2_permutex2var_epi32(__m512i __A, __m512i __I, __mmask16 __U, __m512i __B) { - // CHECK-LABEL: @test_mm512_mask2_permutex2var_epi32 + // CHECK-LABEL: test_mm512_mask2_permutex2var_epi32 // CHECK: @llvm.x86.avx512.vpermi2var.d.512 // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_mask2_permutex2var_epi32(__A, __I, __U, __B); } __m512i test_mm512_unpackhi_epi32(__m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_unpackhi_epi32 + // CHECK-LABEL: test_mm512_unpackhi_epi32 // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> return _mm512_unpackhi_epi32(__A, __B); } __m512d test_mm512_maskz_unpackhi_pd(__mmask8 __U, __m512d __A, __m512d __B) { - // CHECK-LABEL: @test_mm512_maskz_unpackhi_pd + // CHECK-LABEL: test_mm512_maskz_unpackhi_pd // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x i32> // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} return _mm512_maskz_unpackhi_pd(__U, __A, __B); } #if __x86_64__ long long test_mm_cvt_roundsd_i64(__m128d __A) { - // CHECK-LABEL: @test_mm_cvt_roundsd_i64 + // CHECK-LABEL: test_mm_cvt_roundsd_i64 // CHECK: @llvm.x86.avx512.vcvtsd2si64 return _mm_cvt_roundsd_i64(__A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } #endif __m512d test_mm512_mask2_permutex2var_pd(__m512d __A, __m512i __I, __mmask8 __U, __m512d __B) { - // CHECK-LABEL: @test_mm512_mask2_permutex2var_pd + // CHECK-LABEL: test_mm512_mask2_permutex2var_pd // CHECK: @llvm.x86.avx512.vpermi2var.pd.512 // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} return _mm512_mask2_permutex2var_pd(__A, __I, __U, __B); } __m512i test_mm512_mask_unpackhi_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_mask_unpackhi_epi32 + // CHECK-LABEL: test_mm512_mask_unpackhi_epi32 // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_mask_unpackhi_epi32(__W, __U, __A, __B); } __m512 test_mm512_mask_unpackhi_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { - // CHECK-LABEL: @test_mm512_mask_unpackhi_ps + // CHECK-LABEL: test_mm512_mask_unpackhi_ps // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x i32> // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return _mm512_mask_unpackhi_ps(__W, __U, __A, __B); } __m512 test_mm512_maskz_unpackhi_ps(__mmask16 __U, __m512 __A, __m512 __B) { - // CHECK-LABEL: @test_mm512_maskz_unpackhi_ps + // CHECK-LABEL: test_mm512_maskz_unpackhi_ps // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x i32> // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return _mm512_maskz_unpackhi_ps(__U, __A, __B); } __m512d test_mm512_mask_unpacklo_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { - // CHECK-LABEL: @test_mm512_mask_unpacklo_pd + // CHECK-LABEL: test_mm512_mask_unpacklo_pd // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x i32> // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} return _mm512_mask_unpacklo_pd(__W, __U, __A, __B); } __m512d test_mm512_maskz_unpacklo_pd(__mmask8 __U, __m512d __A, __m512d __B) { - // CHECK-LABEL: @test_mm512_maskz_unpacklo_pd + // CHECK-LABEL: test_mm512_maskz_unpacklo_pd // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x i32> // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} return _mm512_maskz_unpacklo_pd(__U, __A, __B); } __m512 test_mm512_mask_unpacklo_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { - // CHECK-LABEL: @test_mm512_mask_unpacklo_ps + // CHECK-LABEL: test_mm512_mask_unpacklo_ps // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x i32> // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return _mm512_mask_unpacklo_ps(__W, __U, __A, __B); } __m512 test_mm512_maskz_unpacklo_ps(__mmask16 __U, __m512 __A, __m512 __B) { - // CHECK-LABEL: @test_mm512_maskz_unpacklo_ps + // CHECK-LABEL: test_mm512_maskz_unpacklo_ps // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x i32> // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return _mm512_maskz_unpacklo_ps(__U, __A, __B); } int test_mm_cvt_roundsd_si32(__m128d __A) { - // CHECK-LABEL: @test_mm_cvt_roundsd_si32 + // CHECK-LABEL: test_mm_cvt_roundsd_si32 // CHECK: @llvm.x86.avx512.vcvtsd2si32 return _mm_cvt_roundsd_si32(__A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } int test_mm_cvt_roundsd_i32(__m128d __A) { - // CHECK-LABEL: @test_mm_cvt_roundsd_i32 + // CHECK-LABEL: test_mm_cvt_roundsd_i32 // CHECK: @llvm.x86.avx512.vcvtsd2si32 return _mm_cvt_roundsd_i32(__A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } unsigned test_mm_cvt_roundsd_u32(__m128d __A) { - // CHECK-LABEL: @test_mm_cvt_roundsd_u32 + // CHECK-LABEL: test_mm_cvt_roundsd_u32 // CHECK: @llvm.x86.avx512.vcvtsd2usi32 return _mm_cvt_roundsd_u32(__A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } unsigned test_mm_cvtsd_u32(__m128d __A) { - // CHECK-LABEL: @test_mm_cvtsd_u32 + // CHECK-LABEL: test_mm_cvtsd_u32 // CHECK: @llvm.x86.avx512.vcvtsd2usi32 return _mm_cvtsd_u32(__A); } @@ -4732,193 +4734,193 @@ int test_mm512_cvtsi512_si32(__m512i a) { #ifdef __x86_64__ unsigned long long test_mm_cvt_roundsd_u64(__m128d __A) { - // CHECK-LABEL: @test_mm_cvt_roundsd_u64 + // CHECK-LABEL: test_mm_cvt_roundsd_u64 // CHECK: @llvm.x86.avx512.vcvtsd2usi64 return _mm_cvt_roundsd_u64(__A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } unsigned long long test_mm_cvtsd_u64(__m128d __A) { - // CHECK-LABEL: @test_mm_cvtsd_u64 + // CHECK-LABEL: test_mm_cvtsd_u64 // CHECK: @llvm.x86.avx512.vcvtsd2usi64 return _mm_cvtsd_u64(__A); } #endif int test_mm_cvt_roundss_si32(__m128 __A) { - // CHECK-LABEL: @test_mm_cvt_roundss_si32 + // CHECK-LABEL: test_mm_cvt_roundss_si32 // CHECK: @llvm.x86.avx512.vcvtss2si32 return _mm_cvt_roundss_si32(__A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } int test_mm_cvt_roundss_i32(__m128 __A) { - // CHECK-LABEL: @test_mm_cvt_roundss_i32 + // CHECK-LABEL: test_mm_cvt_roundss_i32 // CHECK: @llvm.x86.avx512.vcvtss2si32 return _mm_cvt_roundss_i32(__A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } #ifdef __x86_64__ long long test_mm_cvt_roundss_si64(__m128 __A) { - // CHECK-LABEL: @test_mm_cvt_roundss_si64 + // CHECK-LABEL: test_mm_cvt_roundss_si64 // CHECK: @llvm.x86.avx512.vcvtss2si64 return _mm_cvt_roundss_si64(__A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } long long test_mm_cvt_roundss_i64(__m128 __A) { - // CHECK-LABEL: @test_mm_cvt_roundss_i64 + // CHECK-LABEL: test_mm_cvt_roundss_i64 // CHECK: @llvm.x86.avx512.vcvtss2si64 return _mm_cvt_roundss_i64(__A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } #endif unsigned test_mm_cvt_roundss_u32(__m128 __A) { - // CHECK-LABEL: @test_mm_cvt_roundss_u32 + // CHECK-LABEL: test_mm_cvt_roundss_u32 // CHECK: @llvm.x86.avx512.vcvtss2usi32 return _mm_cvt_roundss_u32(__A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } unsigned test_mm_cvtss_u32(__m128 __A) { - // CHECK-LABEL: @test_mm_cvtss_u32 + // CHECK-LABEL: test_mm_cvtss_u32 // CHECK: @llvm.x86.avx512.vcvtss2usi32 return _mm_cvtss_u32(__A); } #ifdef __x86_64__ unsigned long long test_mm_cvt_roundss_u64(__m128 __A) { - // CHECK-LABEL: @test_mm_cvt_roundss_u64 + // CHECK-LABEL: test_mm_cvt_roundss_u64 // CHECK: @llvm.x86.avx512.vcvtss2usi64 return _mm_cvt_roundss_u64(__A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } unsigned long long test_mm_cvtss_u64(__m128 __A) { - // CHECK-LABEL: @test_mm_cvtss_u64 + // CHECK-LABEL: test_mm_cvtss_u64 // CHECK: @llvm.x86.avx512.vcvtss2usi64 return _mm_cvtss_u64(__A); } #endif int test_mm_cvtt_roundsd_i32(__m128d __A) { - // CHECK-LABEL: @test_mm_cvtt_roundsd_i32 + // CHECK-LABEL: test_mm_cvtt_roundsd_i32 // CHECK: @llvm.x86.avx512.cvttsd2si return _mm_cvtt_roundsd_i32(__A, _MM_FROUND_NO_EXC); } int test_mm_cvtt_roundsd_si32(__m128d __A) { - // CHECK-LABEL: @test_mm_cvtt_roundsd_si32 + // CHECK-LABEL: test_mm_cvtt_roundsd_si32 // CHECK: @llvm.x86.avx512.cvttsd2si return _mm_cvtt_roundsd_si32(__A, _MM_FROUND_NO_EXC); } int test_mm_cvttsd_i32(__m128d __A) { - // CHECK-LABEL: @test_mm_cvttsd_i32 + // CHECK-LABEL: test_mm_cvttsd_i32 // CHECK: @llvm.x86.avx512.cvttsd2si return _mm_cvttsd_i32(__A); } #ifdef __x86_64__ long long test_mm_cvtt_roundsd_si64(__m128d __A) { - // CHECK-LABEL: @test_mm_cvtt_roundsd_si64 + // CHECK-LABEL: test_mm_cvtt_roundsd_si64 // CHECK: @llvm.x86.avx512.cvttsd2si64 return _mm_cvtt_roundsd_si64(__A, _MM_FROUND_NO_EXC); } long long test_mm_cvtt_roundsd_i64(__m128d __A) { - // CHECK-LABEL: @test_mm_cvtt_roundsd_i64 + // CHECK-LABEL: test_mm_cvtt_roundsd_i64 // CHECK: @llvm.x86.avx512.cvttsd2si64 return _mm_cvtt_roundsd_i64(__A, _MM_FROUND_NO_EXC); } long long test_mm_cvttsd_i64(__m128d __A) { - // CHECK-LABEL: @test_mm_cvttsd_i64 + // CHECK-LABEL: test_mm_cvttsd_i64 // CHECK: @llvm.x86.avx512.cvttsd2si64 return _mm_cvttsd_i64(__A); } #endif unsigned test_mm_cvtt_roundsd_u32(__m128d __A) { - // CHECK-LABEL: @test_mm_cvtt_roundsd_u32 + // CHECK-LABEL: test_mm_cvtt_roundsd_u32 // CHECK: @llvm.x86.avx512.cvttsd2usi return _mm_cvtt_roundsd_u32(__A, _MM_FROUND_NO_EXC); } unsigned test_mm_cvttsd_u32(__m128d __A) { - // CHECK-LABEL: @test_mm_cvttsd_u32 + // CHECK-LABEL: test_mm_cvttsd_u32 // CHECK: @llvm.x86.avx512.cvttsd2usi return _mm_cvttsd_u32(__A); } #ifdef __x86_64__ unsigned long long test_mm_cvtt_roundsd_u64(__m128d __A) { - // CHECK-LABEL: @test_mm_cvtt_roundsd_u64 + // CHECK-LABEL: test_mm_cvtt_roundsd_u64 // CHECK: @llvm.x86.avx512.cvttsd2usi64 return _mm_cvtt_roundsd_u64(__A, _MM_FROUND_NO_EXC); } unsigned long long test_mm_cvttsd_u64(__m128d __A) { - // CHECK-LABEL: @test_mm_cvttsd_u64 + // CHECK-LABEL: test_mm_cvttsd_u64 // CHECK: @llvm.x86.avx512.cvttsd2usi64 return _mm_cvttsd_u64(__A); } #endif int test_mm_cvtt_roundss_i32(__m128 __A) { - // CHECK-LABEL: @test_mm_cvtt_roundss_i32 + // CHECK-LABEL: test_mm_cvtt_roundss_i32 // CHECK: @llvm.x86.avx512.cvttss2si return _mm_cvtt_roundss_i32(__A, _MM_FROUND_NO_EXC); } int test_mm_cvtt_roundss_si32(__m128 __A) { - // CHECK-LABEL: @test_mm_cvtt_roundss_si32 + // CHECK-LABEL: test_mm_cvtt_roundss_si32 // CHECK: @llvm.x86.avx512.cvttss2si return _mm_cvtt_roundss_si32(__A, _MM_FROUND_NO_EXC); } int test_mm_cvttss_i32(__m128 __A) { - // CHECK-LABEL: @test_mm_cvttss_i32 + // CHECK-LABEL: test_mm_cvttss_i32 // CHECK: @llvm.x86.avx512.cvttss2si return _mm_cvttss_i32(__A); } #ifdef __x86_64__ float test_mm_cvtt_roundss_i64(__m128 __A) { - // CHECK-LABEL: @test_mm_cvtt_roundss_i64 + // CHECK-LABEL: test_mm_cvtt_roundss_i64 // CHECK: @llvm.x86.avx512.cvttss2si64 return _mm_cvtt_roundss_i64(__A, _MM_FROUND_NO_EXC); } long long test_mm_cvtt_roundss_si64(__m128 __A) { - // CHECK-LABEL: @test_mm_cvtt_roundss_si64 + // CHECK-LABEL: test_mm_cvtt_roundss_si64 // CHECK: @llvm.x86.avx512.cvttss2si64 return _mm_cvtt_roundss_si64(__A, _MM_FROUND_NO_EXC); } long long test_mm_cvttss_i64(__m128 __A) { - // CHECK-LABEL: @test_mm_cvttss_i64 + // CHECK-LABEL: test_mm_cvttss_i64 // CHECK: @llvm.x86.avx512.cvttss2si64 return _mm_cvttss_i64(__A); } #endif unsigned test_mm_cvtt_roundss_u32(__m128 __A) { - // CHECK-LABEL: @test_mm_cvtt_roundss_u32 + // CHECK-LABEL: test_mm_cvtt_roundss_u32 // CHECK: @llvm.x86.avx512.cvttss2usi return _mm_cvtt_roundss_u32(__A, _MM_FROUND_NO_EXC); } unsigned test_mm_cvttss_u32(__m128 __A) { - // CHECK-LABEL: @test_mm_cvttss_u32 + // CHECK-LABEL: test_mm_cvttss_u32 // CHECK: @llvm.x86.avx512.cvttss2usi return _mm_cvttss_u32(__A); } #ifdef __x86_64__ unsigned long long test_mm_cvtt_roundss_u64(__m128 __A) { - // CHECK-LABEL: @test_mm_cvtt_roundss_u64 + // CHECK-LABEL: test_mm_cvtt_roundss_u64 // CHECK: @llvm.x86.avx512.cvttss2usi64 return _mm_cvtt_roundss_u64(__A, _MM_FROUND_NO_EXC); } unsigned long long test_mm_cvttss_u64(__m128 __A) { - // CHECK-LABEL: @test_mm_cvttss_u64 + // CHECK-LABEL: test_mm_cvttss_u64 // CHECK: @llvm.x86.avx512.cvttss2usi64 return _mm_cvttss_u64(__A); } @@ -4926,21 +4928,21 @@ unsigned long long test_mm_cvttss_u64(__m128 __A) { __m512i test_mm512_cvtt_roundps_epu32(__m512 __A) { - // CHECK-LABEL: @test_mm512_cvtt_roundps_epu32 + // CHECK-LABEL: test_mm512_cvtt_roundps_epu32 // CHECK: @llvm.x86.avx512.mask.cvttps2udq.512 return _mm512_cvtt_roundps_epu32(__A, _MM_FROUND_NO_EXC); } __m512i test_mm512_mask_cvtt_roundps_epu32(__m512i __W, __mmask16 __U, __m512 __A) { - // CHECK-LABEL: @test_mm512_mask_cvtt_roundps_epu32 + // CHECK-LABEL: test_mm512_mask_cvtt_roundps_epu32 // CHECK: @llvm.x86.avx512.mask.cvttps2udq.512 return _mm512_mask_cvtt_roundps_epu32(__W, __U, __A, _MM_FROUND_NO_EXC); } __m512i test_mm512_maskz_cvtt_roundps_epu32( __mmask16 __U, __m512 __A) { - // CHECK-LABEL: @test_mm512_maskz_cvtt_roundps_epu32 + // CHECK-LABEL: test_mm512_maskz_cvtt_roundps_epu32 // CHECK: @llvm.x86.avx512.mask.cvttps2udq.512 return _mm512_maskz_cvtt_roundps_epu32(__U, __A, _MM_FROUND_NO_EXC); @@ -4948,56 +4950,56 @@ __m512i test_mm512_maskz_cvtt_roundps_epu32( __mmask16 __U, __m512 __A) __m256i test_mm512_cvt_roundps_ph(__m512 __A) { - // CHECK-LABEL: @test_mm512_cvt_roundps_ph + // CHECK-LABEL: test_mm512_cvt_roundps_ph // CHECK: @llvm.x86.avx512.mask.vcvtps2ph.512 return _mm512_cvt_roundps_ph(__A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m256i test_mm512_mask_cvt_roundps_ph(__m256i __W , __mmask16 __U, __m512 __A) { - // CHECK-LABEL: @test_mm512_mask_cvt_roundps_ph + // CHECK-LABEL: test_mm512_mask_cvt_roundps_ph // CHECK: @llvm.x86.avx512.mask.vcvtps2ph.512 return _mm512_mask_cvt_roundps_ph(__W, __U, __A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m256i test_mm512_maskz_cvt_roundps_ph(__mmask16 __U, __m512 __A) { - // CHECK-LABEL: @test_mm512_maskz_cvt_roundps_ph + // CHECK-LABEL: test_mm512_maskz_cvt_roundps_ph // CHECK: @llvm.x86.avx512.mask.vcvtps2ph.512 return _mm512_maskz_cvt_roundps_ph(__U, __A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m512 test_mm512_cvt_roundph_ps(__m256i __A) { - // CHECK-LABEL: @test_mm512_cvt_roundph_ps + // CHECK-LABEL: test_mm512_cvt_roundph_ps // CHECK: @llvm.x86.avx512.mask.vcvtph2ps.512( return _mm512_cvt_roundph_ps(__A, _MM_FROUND_NO_EXC); } __m512 test_mm512_mask_cvt_roundph_ps(__m512 __W, __mmask16 __U, __m256i __A) { - // CHECK-LABEL: @test_mm512_mask_cvt_roundph_ps + // CHECK-LABEL: test_mm512_mask_cvt_roundph_ps // CHECK: @llvm.x86.avx512.mask.vcvtph2ps.512( return _mm512_mask_cvt_roundph_ps(__W, __U, __A, _MM_FROUND_NO_EXC); } __m512 test_mm512_maskz_cvt_roundph_ps(__mmask16 __U, __m256i __A) { - // CHECK-LABEL: @test_mm512_maskz_cvt_roundph_ps + // CHECK-LABEL: test_mm512_maskz_cvt_roundph_ps // CHECK: @llvm.x86.avx512.mask.vcvtph2ps.512( return _mm512_maskz_cvt_roundph_ps(__U, __A, _MM_FROUND_NO_EXC); } __m512 test_mm512_cvt_roundepi32_ps( __m512i __A) { - // CHECK-LABEL: @test_mm512_cvt_roundepi32_ps + // CHECK-LABEL: test_mm512_cvt_roundepi32_ps // CHECK: @llvm.x86.avx512.sitofp.round.v16f32.v16i32 return _mm512_cvt_roundepi32_ps(__A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m512 test_mm512_mask_cvt_roundepi32_ps(__m512 __W, __mmask16 __U, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_cvt_roundepi32_ps + // CHECK-LABEL: test_mm512_mask_cvt_roundepi32_ps // CHECK: @llvm.x86.avx512.sitofp.round.v16f32.v16i32 // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return _mm512_mask_cvt_roundepi32_ps(__W,__U,__A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); @@ -5005,7 +5007,7 @@ __m512 test_mm512_mask_cvt_roundepi32_ps(__m512 __W, __mmask16 __U, __m512i __A) __m512 test_mm512_maskz_cvt_roundepi32_ps(__mmask16 __U, __m512i __A) { - // CHECK-LABEL: @test_mm512_maskz_cvt_roundepi32_ps + // CHECK-LABEL: test_mm512_maskz_cvt_roundepi32_ps // CHECK: @llvm.x86.avx512.sitofp.round.v16f32.v16i32 // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return _mm512_maskz_cvt_roundepi32_ps(__U,__A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); @@ -5013,14 +5015,14 @@ __m512 test_mm512_maskz_cvt_roundepi32_ps(__mmask16 __U, __m512i __A) __m512 test_mm512_cvt_roundepu32_ps(__m512i __A) { - // CHECK-LABEL: @test_mm512_cvt_roundepu32_ps + // CHECK-LABEL: test_mm512_cvt_roundepu32_ps // CHECK: @llvm.x86.avx512.uitofp.round.v16f32.v16i32 return _mm512_cvt_roundepu32_ps(__A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m512 test_mm512_mask_cvt_roundepu32_ps(__m512 __W, __mmask16 __U,__m512i __A) { - // CHECK-LABEL: @test_mm512_mask_cvt_roundepu32_ps + // CHECK-LABEL: test_mm512_mask_cvt_roundepu32_ps // CHECK: @llvm.x86.avx512.uitofp.round.v16f32.v16i32 // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return _mm512_mask_cvt_roundepu32_ps(__W,__U,__A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); @@ -5028,7 +5030,7 @@ __m512 test_mm512_mask_cvt_roundepu32_ps(__m512 __W, __mmask16 __U,__m512i __A) __m512 test_mm512_maskz_cvt_roundepu32_ps(__mmask16 __U,__m512i __A) { - // CHECK-LABEL: @test_mm512_maskz_cvt_roundepu32_ps + // CHECK-LABEL: test_mm512_maskz_cvt_roundepu32_ps // CHECK: @llvm.x86.avx512.uitofp.round.v16f32.v16i32 // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return _mm512_maskz_cvt_roundepu32_ps(__U,__A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); @@ -5036,253 +5038,253 @@ __m512 test_mm512_maskz_cvt_roundepu32_ps(__mmask16 __U,__m512i __A) __m256 test_mm512_cvt_roundpd_ps(__m512d A) { - // CHECK-LABEL: @test_mm512_cvt_roundpd_ps + // CHECK-LABEL: test_mm512_cvt_roundpd_ps // CHECK: @llvm.x86.avx512.mask.cvtpd2ps.512 return _mm512_cvt_roundpd_ps(A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m256 test_mm512_mask_cvt_roundpd_ps(__m256 W, __mmask8 U,__m512d A) { - // CHECK-LABEL: @test_mm512_mask_cvt_roundpd_ps + // CHECK-LABEL: test_mm512_mask_cvt_roundpd_ps // CHECK: @llvm.x86.avx512.mask.cvtpd2ps.512 return _mm512_mask_cvt_roundpd_ps(W,U,A,_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m256 test_mm512_maskz_cvt_roundpd_ps(__mmask8 U, __m512d A) { - // CHECK-LABEL: @test_mm512_maskz_cvt_roundpd_ps + // CHECK-LABEL: test_mm512_maskz_cvt_roundpd_ps // CHECK: @llvm.x86.avx512.mask.cvtpd2ps.512 return _mm512_maskz_cvt_roundpd_ps(U,A,_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m256i test_mm512_cvtt_roundpd_epi32(__m512d A) { - // CHECK-LABEL: @test_mm512_cvtt_roundpd_epi32 + // CHECK-LABEL: test_mm512_cvtt_roundpd_epi32 // CHECK: @llvm.x86.avx512.mask.cvttpd2dq.512 return _mm512_cvtt_roundpd_epi32(A,_MM_FROUND_NO_EXC); } __m256i test_mm512_mask_cvtt_roundpd_epi32(__m256i W, __mmask8 U, __m512d A) { - // CHECK-LABEL: @test_mm512_mask_cvtt_roundpd_epi32 + // CHECK-LABEL: test_mm512_mask_cvtt_roundpd_epi32 // CHECK: @llvm.x86.avx512.mask.cvttpd2dq.512 return _mm512_mask_cvtt_roundpd_epi32(W,U,A,_MM_FROUND_NO_EXC); } __m256i test_mm512_maskz_cvtt_roundpd_epi32(__mmask8 U, __m512d A) { - // CHECK-LABEL: @test_mm512_maskz_cvtt_roundpd_epi32 + // CHECK-LABEL: test_mm512_maskz_cvtt_roundpd_epi32 // CHECK: @llvm.x86.avx512.mask.cvttpd2dq.512 return _mm512_maskz_cvtt_roundpd_epi32(U,A,_MM_FROUND_NO_EXC); } __m512i test_mm512_cvtt_roundps_epi32(__m512 A) { - // CHECK-LABEL: @test_mm512_cvtt_roundps_epi32 + // CHECK-LABEL: test_mm512_cvtt_roundps_epi32 // CHECK: @llvm.x86.avx512.mask.cvttps2dq.512 return _mm512_cvtt_roundps_epi32(A,_MM_FROUND_NO_EXC); } __m512i test_mm512_mask_cvtt_roundps_epi32(__m512i W,__mmask16 U, __m512 A) { - // CHECK-LABEL: @test_mm512_mask_cvtt_roundps_epi32 + // CHECK-LABEL: test_mm512_mask_cvtt_roundps_epi32 // CHECK: @llvm.x86.avx512.mask.cvttps2dq.512 return _mm512_mask_cvtt_roundps_epi32(W,U,A,_MM_FROUND_NO_EXC); } __m512i test_mm512_maskz_cvtt_roundps_epi32(__mmask16 U, __m512 A) { - // CHECK-LABEL: @test_mm512_maskz_cvtt_roundps_epi32 + // CHECK-LABEL: test_mm512_maskz_cvtt_roundps_epi32 // CHECK: @llvm.x86.avx512.mask.cvttps2dq.512 return _mm512_maskz_cvtt_roundps_epi32(U,A,_MM_FROUND_NO_EXC); } __m512i test_mm512_cvt_roundps_epi32(__m512 __A) { - // CHECK-LABEL: @test_mm512_cvt_roundps_epi32 + // CHECK-LABEL: test_mm512_cvt_roundps_epi32 // CHECK: @llvm.x86.avx512.mask.cvtps2dq.512 return _mm512_cvt_roundps_epi32(__A,_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m512i test_mm512_mask_cvt_roundps_epi32(__m512i __W,__mmask16 __U,__m512 __A) { - // CHECK-LABEL: @test_mm512_mask_cvt_roundps_epi32 + // CHECK-LABEL: test_mm512_mask_cvt_roundps_epi32 // CHECK: @llvm.x86.avx512.mask.cvtps2dq.512 return _mm512_mask_cvt_roundps_epi32(__W,__U,__A,_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m512i test_mm512_maskz_cvt_roundps_epi32(__mmask16 __U, __m512 __A) { - // CHECK-LABEL: @test_mm512_maskz_cvt_roundps_epi32 + // CHECK-LABEL: test_mm512_maskz_cvt_roundps_epi32 // CHECK: @llvm.x86.avx512.mask.cvtps2dq.512 return _mm512_maskz_cvt_roundps_epi32(__U,__A,_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m256i test_mm512_cvt_roundpd_epi32(__m512d A) { - // CHECK-LABEL: @test_mm512_cvt_roundpd_epi32 + // CHECK-LABEL: test_mm512_cvt_roundpd_epi32 // CHECK: @llvm.x86.avx512.mask.cvtpd2dq.512 return _mm512_cvt_roundpd_epi32(A,_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m256i test_mm512_mask_cvt_roundpd_epi32(__m256i W,__mmask8 U,__m512d A) { - // CHECK-LABEL: @test_mm512_mask_cvt_roundpd_epi32 + // CHECK-LABEL: test_mm512_mask_cvt_roundpd_epi32 // CHECK: @llvm.x86.avx512.mask.cvtpd2dq.512 return _mm512_mask_cvt_roundpd_epi32(W,U,A,_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m256i test_mm512_maskz_cvt_roundpd_epi32(__mmask8 U, __m512d A) { - // CHECK-LABEL: @test_mm512_maskz_cvt_roundpd_epi32 + // CHECK-LABEL: test_mm512_maskz_cvt_roundpd_epi32 // CHECK: @llvm.x86.avx512.mask.cvtpd2dq.512 return _mm512_maskz_cvt_roundpd_epi32(U,A,_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m512i test_mm512_cvt_roundps_epu32(__m512 __A) { - // CHECK-LABEL: @test_mm512_cvt_roundps_epu32 + // CHECK-LABEL: test_mm512_cvt_roundps_epu32 // CHECK: @llvm.x86.avx512.mask.cvtps2udq.512 return _mm512_cvt_roundps_epu32(__A,_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m512i test_mm512_mask_cvt_roundps_epu32(__m512i __W,__mmask16 __U,__m512 __A) { - // CHECK-LABEL: @test_mm512_mask_cvt_roundps_epu32 + // CHECK-LABEL: test_mm512_mask_cvt_roundps_epu32 // CHECK: @llvm.x86.avx512.mask.cvtps2udq.512 return _mm512_mask_cvt_roundps_epu32(__W,__U,__A,_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m512i test_mm512_maskz_cvt_roundps_epu32(__mmask16 __U,__m512 __A) { - // CHECK-LABEL: @test_mm512_maskz_cvt_roundps_epu32 + // CHECK-LABEL: test_mm512_maskz_cvt_roundps_epu32 // CHECK: @llvm.x86.avx512.mask.cvtps2udq.512 return _mm512_maskz_cvt_roundps_epu32(__U,__A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m256i test_mm512_cvt_roundpd_epu32(__m512d A) { - // CHECK-LABEL: @test_mm512_cvt_roundpd_epu32 + // CHECK-LABEL: test_mm512_cvt_roundpd_epu32 // CHECK: @llvm.x86.avx512.mask.cvtpd2udq.512 return _mm512_cvt_roundpd_epu32(A,_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m256i test_mm512_mask_cvt_roundpd_epu32(__m256i W, __mmask8 U, __m512d A) { - // CHECK-LABEL: @test_mm512_mask_cvt_roundpd_epu32 + // CHECK-LABEL: test_mm512_mask_cvt_roundpd_epu32 // CHECK: @llvm.x86.avx512.mask.cvtpd2udq.512 return _mm512_mask_cvt_roundpd_epu32(W,U,A,_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m256i test_mm512_maskz_cvt_roundpd_epu32(__mmask8 U, __m512d A) { - // CHECK-LABEL: @test_mm512_maskz_cvt_roundpd_epu32 + // CHECK-LABEL: test_mm512_maskz_cvt_roundpd_epu32 // CHECK: @llvm.x86.avx512.mask.cvtpd2udq.512 return _mm512_maskz_cvt_roundpd_epu32(U, A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m512 test_mm512_mask2_permutex2var_ps(__m512 __A, __m512i __I, __mmask16 __U, __m512 __B) { - // CHECK-LABEL: @test_mm512_mask2_permutex2var_ps + // CHECK-LABEL: test_mm512_mask2_permutex2var_ps // CHECK: @llvm.x86.avx512.vpermi2var.ps.512 // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return _mm512_mask2_permutex2var_ps(__A, __I, __U, __B); } __m512i test_mm512_mask2_permutex2var_epi64(__m512i __A, __m512i __I, __mmask8 __U, __m512i __B) { - // CHECK-LABEL: @test_mm512_mask2_permutex2var_epi64 + // CHECK-LABEL: test_mm512_mask2_permutex2var_epi64 // CHECK: @llvm.x86.avx512.vpermi2var.q.512 // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_mask2_permutex2var_epi64(__A, __I, __U, __B); } __m512d test_mm512_permute_pd(__m512d __X) { - // CHECK-LABEL: @test_mm512_permute_pd + // CHECK-LABEL: test_mm512_permute_pd // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> poison, <8 x i32> return _mm512_permute_pd(__X, 2); } __m512d test_mm512_mask_permute_pd(__m512d __W, __mmask8 __U, __m512d __X) { - // CHECK-LABEL: @test_mm512_mask_permute_pd + // CHECK-LABEL: test_mm512_mask_permute_pd // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> poison, <8 x i32> // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} return _mm512_mask_permute_pd(__W, __U, __X, 2); } __m512d test_mm512_maskz_permute_pd(__mmask8 __U, __m512d __X) { - // CHECK-LABEL: @test_mm512_maskz_permute_pd + // CHECK-LABEL: test_mm512_maskz_permute_pd // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> poison, <8 x i32> // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} return _mm512_maskz_permute_pd(__U, __X, 2); } __m512 test_mm512_permute_ps(__m512 __X) { - // CHECK-LABEL: @test_mm512_permute_ps + // CHECK-LABEL: test_mm512_permute_ps // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> poison, <16 x i32> return _mm512_permute_ps(__X, 2); } __m512 test_mm512_mask_permute_ps(__m512 __W, __mmask16 __U, __m512 __X) { - // CHECK-LABEL: @test_mm512_mask_permute_ps + // CHECK-LABEL: test_mm512_mask_permute_ps // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> poison, <16 x i32> // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return _mm512_mask_permute_ps(__W, __U, __X, 2); } __m512 test_mm512_maskz_permute_ps(__mmask16 __U, __m512 __X) { - // CHECK-LABEL: @test_mm512_maskz_permute_ps + // CHECK-LABEL: test_mm512_maskz_permute_ps // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> poison, <16 x i32> // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return _mm512_maskz_permute_ps(__U, __X, 2); } __m512d test_mm512_permutevar_pd(__m512d __A, __m512i __C) { - // CHECK-LABEL: @test_mm512_permutevar_pd + // CHECK-LABEL: test_mm512_permutevar_pd // CHECK: @llvm.x86.avx512.vpermilvar.pd.512 return _mm512_permutevar_pd(__A, __C); } __m512d test_mm512_mask_permutevar_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512i __C) { - // CHECK-LABEL: @test_mm512_mask_permutevar_pd + // CHECK-LABEL: test_mm512_mask_permutevar_pd // CHECK: @llvm.x86.avx512.vpermilvar.pd.512 // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} return _mm512_mask_permutevar_pd(__W, __U, __A, __C); } __m512d test_mm512_maskz_permutevar_pd(__mmask8 __U, __m512d __A, __m512i __C) { - // CHECK-LABEL: @test_mm512_maskz_permutevar_pd + // CHECK-LABEL: test_mm512_maskz_permutevar_pd // CHECK: @llvm.x86.avx512.vpermilvar.pd.512 // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} return _mm512_maskz_permutevar_pd(__U, __A, __C); } __m512 test_mm512_permutevar_ps(__m512 __A, __m512i __C) { - // CHECK-LABEL: @test_mm512_permutevar_ps + // CHECK-LABEL: test_mm512_permutevar_ps // CHECK: @llvm.x86.avx512.vpermilvar.ps.512 return _mm512_permutevar_ps(__A, __C); } __m512 test_mm512_mask_permutevar_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512i __C) { - // CHECK-LABEL: @test_mm512_mask_permutevar_ps + // CHECK-LABEL: test_mm512_mask_permutevar_ps // CHECK: @llvm.x86.avx512.vpermilvar.ps.512 // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return _mm512_mask_permutevar_ps(__W, __U, __A, __C); } __m512 test_mm512_maskz_permutevar_ps(__mmask16 __U, __m512 __A, __m512i __C) { - // CHECK-LABEL: @test_mm512_maskz_permutevar_ps + // CHECK-LABEL: test_mm512_maskz_permutevar_ps // CHECK: @llvm.x86.avx512.vpermilvar.ps.512 // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return _mm512_maskz_permutevar_ps(__U, __A, __C); } __m512i test_mm512_permutex2var_epi32(__m512i __A, __m512i __I, __m512i __B) { - // CHECK-LABEL: @test_mm512_permutex2var_epi32 + // CHECK-LABEL: test_mm512_permutex2var_epi32 // CHECK: @llvm.x86.avx512.vpermi2var.d.512 return _mm512_permutex2var_epi32(__A, __I, __B); } __m512i test_mm512_maskz_permutex2var_epi32(__mmask16 __U, __m512i __A, __m512i __I, __m512i __B) { - // CHECK-LABEL: @test_mm512_maskz_permutex2var_epi32 + // CHECK-LABEL: test_mm512_maskz_permutex2var_epi32 // CHECK: @llvm.x86.avx512.vpermi2var.d.512 // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_maskz_permutex2var_epi32(__U, __A, __I, __B); @@ -5290,7 +5292,7 @@ __m512i test_mm512_maskz_permutex2var_epi32(__mmask16 __U, __m512i __A, __m512i __m512i test_mm512_mask_permutex2var_epi32 (__m512i __A, __mmask16 __U, __m512i __I, __m512i __B) { - // CHECK-LABEL: @test_mm512_mask_permutex2var_epi32 + // CHECK-LABEL: test_mm512_mask_permutex2var_epi32 // CHECK: @llvm.x86.avx512.vpermi2var.d.512 // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_mask_permutex2var_epi32 (__A,__U,__I,__B); @@ -5298,21 +5300,21 @@ __m512i test_mm512_mask_permutex2var_epi32 (__m512i __A, __mmask16 __U, __m512i __m512d test_mm512_permutex2var_pd (__m512d __A, __m512i __I, __m512d __B) { - // CHECK-LABEL: @test_mm512_permutex2var_pd + // CHECK-LABEL: test_mm512_permutex2var_pd // CHECK: @llvm.x86.avx512.vpermi2var.pd.512 return _mm512_permutex2var_pd (__A, __I,__B); } __m512d test_mm512_mask_permutex2var_pd (__m512d __A, __mmask8 __U, __m512i __I, __m512d __B) { - // CHECK-LABEL: @test_mm512_mask_permutex2var_pd + // CHECK-LABEL: test_mm512_mask_permutex2var_pd // CHECK: @llvm.x86.avx512.vpermi2var.pd.512 // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} return _mm512_mask_permutex2var_pd (__A,__U,__I,__B); } __m512d test_mm512_maskz_permutex2var_pd(__mmask8 __U, __m512d __A, __m512i __I, __m512d __B) { - // CHECK-LABEL: @test_mm512_maskz_permutex2var_pd + // CHECK-LABEL: test_mm512_maskz_permutex2var_pd // CHECK: @llvm.x86.avx512.vpermi2var.pd.512 // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} return _mm512_maskz_permutex2var_pd(__U, __A, __I, __B); @@ -5320,54 +5322,54 @@ __m512d test_mm512_maskz_permutex2var_pd(__mmask8 __U, __m512d __A, __m512i __I, __m512 test_mm512_permutex2var_ps (__m512 __A, __m512i __I, __m512 __B) { - // CHECK-LABEL: @test_mm512_permutex2var_ps + // CHECK-LABEL: test_mm512_permutex2var_ps // CHECK: @llvm.x86.avx512.vpermi2var.ps.512 return _mm512_permutex2var_ps (__A, __I, __B); } __m512 test_mm512_mask_permutex2var_ps (__m512 __A, __mmask16 __U, __m512i __I, __m512 __B) { - // CHECK-LABEL: @test_mm512_mask_permutex2var_ps + // CHECK-LABEL: test_mm512_mask_permutex2var_ps // CHECK: @llvm.x86.avx512.vpermi2var.ps.512 // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return _mm512_mask_permutex2var_ps (__A,__U,__I,__B); } __m512 test_mm512_maskz_permutex2var_ps(__mmask16 __U, __m512 __A, __m512i __I, __m512 __B) { - // CHECK-LABEL: @test_mm512_maskz_permutex2var_ps + // CHECK-LABEL: test_mm512_maskz_permutex2var_ps // CHECK: @llvm.x86.avx512.vpermi2var.ps.512 // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return _mm512_maskz_permutex2var_ps(__U, __A, __I, __B); } __m512i test_mm512_permutex2var_epi64 (__m512i __A, __m512i __I, __m512i __B){ - // CHECK-LABEL: @test_mm512_permutex2var_epi64 + // CHECK-LABEL: test_mm512_permutex2var_epi64 // CHECK: @llvm.x86.avx512.vpermi2var.q.512 return _mm512_permutex2var_epi64(__A, __I, __B); } __m512i test_mm512_mask_permutex2var_epi64 (__m512i __A, __mmask8 __U, __m512i __I, __m512i __B){ - // CHECK-LABEL: @test_mm512_mask_permutex2var_epi64 + // CHECK-LABEL: test_mm512_mask_permutex2var_epi64 // CHECK: @llvm.x86.avx512.vpermi2var.q.512 // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_mask_permutex2var_epi64(__A, __U, __I, __B); } __m512i test_mm512_maskz_permutex2var_epi64(__mmask8 __U, __m512i __A, __m512i __I, __m512i __B) { - // CHECK-LABEL: @test_mm512_maskz_permutex2var_epi64 + // CHECK-LABEL: test_mm512_maskz_permutex2var_epi64 // CHECK: @llvm.x86.avx512.vpermi2var.q.512 // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_maskz_permutex2var_epi64(__U, __A, __I, __B); } __mmask16 test_mm512_testn_epi32_mask(__m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_testn_epi32_mask + // CHECK-LABEL: test_mm512_testn_epi32_mask // CHECK: and <16 x i32> %{{.*}}, %{{.*}} // CHECK: icmp eq <16 x i32> %{{.*}}, %{{.*}} return _mm512_testn_epi32_mask(__A, __B); } __mmask16 test_mm512_mask_testn_epi32_mask(__mmask16 __U, __m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_mask_testn_epi32_mask + // CHECK-LABEL: test_mm512_mask_testn_epi32_mask // CHECK: and <16 x i32> %{{.*}}, %{{.*}} // CHECK: icmp eq <16 x i32> %{{.*}}, %{{.*}} // CHECK: and <16 x i1> %{{.*}}, %{{.*}} @@ -5375,14 +5377,14 @@ __mmask16 test_mm512_mask_testn_epi32_mask(__mmask16 __U, __m512i __A, __m512i _ } __mmask8 test_mm512_testn_epi64_mask(__m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_testn_epi64_mask + // CHECK-LABEL: test_mm512_testn_epi64_mask // CHECK: and <16 x i32> %{{.*}}, %{{.*}} // CHECK: icmp eq <8 x i64> %{{.*}}, %{{.*}} return _mm512_testn_epi64_mask(__A, __B); } __mmask8 test_mm512_mask_testn_epi64_mask(__mmask8 __U, __m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_mask_testn_epi64_mask + // CHECK-LABEL: test_mm512_mask_testn_epi64_mask // CHECK: and <16 x i32> %{{.*}}, %{{.*}} // CHECK: icmp eq <8 x i64> %{{.*}}, %{{.*}} // CHECK: and <8 x i1> %{{.*}}, %{{.*}} @@ -5391,7 +5393,7 @@ __mmask8 test_mm512_mask_testn_epi64_mask(__mmask8 __U, __m512i __A, __m512i __B __mmask16 test_mm512_mask_test_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_mask_test_epi32_mask + // CHECK-LABEL: test_mm512_mask_test_epi32_mask // CHECK: and <16 x i32> %{{.*}}, %{{.*}} // CHECK: icmp ne <16 x i32> %{{.*}}, %{{.*}} return _mm512_mask_test_epi32_mask (__U,__A,__B); @@ -5399,7 +5401,7 @@ __mmask16 test_mm512_mask_test_epi32_mask (__mmask16 __U, __m512i __A, __m512i _ __mmask8 test_mm512_mask_test_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_mask_test_epi64_mask + // CHECK-LABEL: test_mm512_mask_test_epi64_mask // CHECK: and <16 x i32> %{{.*}}, %{{.*}} // CHECK: icmp ne <8 x i64> %{{.*}}, %{{.*}} // CHECK: and <8 x i1> %{{.*}}, %{{.*}} @@ -5407,80 +5409,80 @@ __mmask8 test_mm512_mask_test_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B } __m512i test_mm512_maskz_unpackhi_epi32(__mmask16 __U, __m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_maskz_unpackhi_epi32 + // CHECK-LABEL: test_mm512_maskz_unpackhi_epi32 // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_maskz_unpackhi_epi32(__U, __A, __B); } __m512i test_mm512_unpackhi_epi64(__m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_unpackhi_epi64 + // CHECK-LABEL: test_mm512_unpackhi_epi64 // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> return _mm512_unpackhi_epi64(__A, __B); } __m512i test_mm512_mask_unpackhi_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_mask_unpackhi_epi64 + // CHECK-LABEL: test_mm512_mask_unpackhi_epi64 // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_mask_unpackhi_epi64(__W, __U, __A, __B); } __m512i test_mm512_maskz_unpackhi_epi64(__mmask8 __U, __m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_maskz_unpackhi_epi64 + // CHECK-LABEL: test_mm512_maskz_unpackhi_epi64 // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_maskz_unpackhi_epi64(__U, __A, __B); } __m512i test_mm512_unpacklo_epi32(__m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_unpacklo_epi32 + // CHECK-LABEL: test_mm512_unpacklo_epi32 // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> return _mm512_unpacklo_epi32(__A, __B); } __m512i test_mm512_mask_unpacklo_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_mask_unpacklo_epi32 + // CHECK-LABEL: test_mm512_mask_unpacklo_epi32 // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_mask_unpacklo_epi32(__W, __U, __A, __B); } __m512i test_mm512_maskz_unpacklo_epi32(__mmask16 __U, __m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_maskz_unpacklo_epi32 + // CHECK-LABEL: test_mm512_maskz_unpacklo_epi32 // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_maskz_unpacklo_epi32(__U, __A, __B); } __m512i test_mm512_unpacklo_epi64(__m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_unpacklo_epi64 + // CHECK-LABEL: test_mm512_unpacklo_epi64 // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> return _mm512_unpacklo_epi64(__A, __B); } __m512i test_mm512_mask_unpacklo_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_mask_unpacklo_epi64 + // CHECK-LABEL: test_mm512_mask_unpacklo_epi64 // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_mask_unpacklo_epi64(__W, __U, __A, __B); } __m512i test_mm512_maskz_unpacklo_epi64(__mmask8 __U, __m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_maskz_unpacklo_epi64 + // CHECK-LABEL: test_mm512_maskz_unpacklo_epi64 // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_maskz_unpacklo_epi64(__U, __A, __B); } __m128d test_mm_roundscale_round_sd(__m128d __A, __m128d __B) { - // CHECK-LABEL: @test_mm_roundscale_round_sd + // CHECK-LABEL: test_mm_roundscale_round_sd // CHECK: @llvm.x86.avx512.mask.rndscale.sd return _mm_roundscale_round_sd(__A, __B, 3, _MM_FROUND_NO_EXC); } __m128d test_mm_roundscale_sd(__m128d __A, __m128d __B) { - // CHECK-LABEL: @test_mm_roundscale_sd + // CHECK-LABEL: test_mm_roundscale_sd // CHECK: @llvm.x86.avx512.mask.rndscale.sd return _mm_roundscale_sd(__A, __B, 3); } @@ -5506,667 +5508,667 @@ __m128d test_mm_maskz_roundscale_round_sd(__mmask8 __U, __m128d __A, __m128d __B } __m128 test_mm_roundscale_round_ss(__m128 __A, __m128 __B) { - // CHECK-LABEL: @test_mm_roundscale_round_ss + // CHECK-LABEL: test_mm_roundscale_round_ss // CHECK: @llvm.x86.avx512.mask.rndscale.ss return _mm_roundscale_round_ss(__A, __B, 3, _MM_FROUND_NO_EXC); } __m128 test_mm_roundscale_ss(__m128 __A, __m128 __B) { - // CHECK-LABEL: @test_mm_roundscale_ss + // CHECK-LABEL: test_mm_roundscale_ss // CHECK: @llvm.x86.avx512.mask.rndscale.ss return _mm_roundscale_ss(__A, __B, 3); } __m128 test_mm_mask_roundscale_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B){ - // CHECK-LABEL: @test_mm_mask_roundscale_ss + // CHECK-LABEL: test_mm_mask_roundscale_ss // CHECK: @llvm.x86.avx512.mask.rndscale.ss return _mm_mask_roundscale_ss(__W,__U,__A,__B,3); } __m128 test_mm_maskz_roundscale_round_ss( __mmask8 __U, __m128 __A, __m128 __B){ - // CHECK-LABEL: @test_mm_maskz_roundscale_round_ss + // CHECK-LABEL: test_mm_maskz_roundscale_round_ss // CHECK: @llvm.x86.avx512.mask.rndscale.ss return _mm_maskz_roundscale_round_ss(__U,__A,__B,3,_MM_FROUND_NO_EXC); } __m128 test_mm_maskz_roundscale_ss(__mmask8 __U, __m128 __A, __m128 __B){ - // CHECK-LABEL: @test_mm_maskz_roundscale_ss + // CHECK-LABEL: test_mm_maskz_roundscale_ss // CHECK: @llvm.x86.avx512.mask.rndscale.ss return _mm_maskz_roundscale_ss(__U,__A,__B,3); } __m512d test_mm512_scalef_round_pd(__m512d __A, __m512d __B) { - // CHECK-LABEL: @test_mm512_scalef_round_pd + // CHECK-LABEL: test_mm512_scalef_round_pd // CHECK: @llvm.x86.avx512.mask.scalef.pd.512 return _mm512_scalef_round_pd(__A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m512d test_mm512_mask_scalef_round_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { - // CHECK-LABEL: @test_mm512_mask_scalef_round_pd + // CHECK-LABEL: test_mm512_mask_scalef_round_pd // CHECK: @llvm.x86.avx512.mask.scalef.pd.512 return _mm512_mask_scalef_round_pd(__W, __U, __A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m512d test_mm512_maskz_scalef_round_pd(__mmask8 __U, __m512d __A, __m512d __B) { - // CHECK-LABEL: @test_mm512_maskz_scalef_round_pd + // CHECK-LABEL: test_mm512_maskz_scalef_round_pd // CHECK: @llvm.x86.avx512.mask.scalef.pd.512 return _mm512_maskz_scalef_round_pd(__U, __A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m512d test_mm512_scalef_pd(__m512d __A, __m512d __B) { - // CHECK-LABEL: @test_mm512_scalef_pd + // CHECK-LABEL: test_mm512_scalef_pd // CHECK: @llvm.x86.avx512.mask.scalef.pd.512 return _mm512_scalef_pd(__A, __B); } __m512d test_mm512_mask_scalef_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { - // CHECK-LABEL: @test_mm512_mask_scalef_pd + // CHECK-LABEL: test_mm512_mask_scalef_pd // CHECK: @llvm.x86.avx512.mask.scalef.pd.512 return _mm512_mask_scalef_pd(__W, __U, __A, __B); } __m512d test_mm512_maskz_scalef_pd(__mmask8 __U, __m512d __A, __m512d __B) { - // CHECK-LABEL: @test_mm512_maskz_scalef_pd + // CHECK-LABEL: test_mm512_maskz_scalef_pd // CHECK: @llvm.x86.avx512.mask.scalef.pd.512 return _mm512_maskz_scalef_pd(__U, __A, __B); } __m512 test_mm512_scalef_round_ps(__m512 __A, __m512 __B) { - // CHECK-LABEL: @test_mm512_scalef_round_ps + // CHECK-LABEL: test_mm512_scalef_round_ps // CHECK: @llvm.x86.avx512.mask.scalef.ps.512 return _mm512_scalef_round_ps(__A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m512 test_mm512_mask_scalef_round_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { - // CHECK-LABEL: @test_mm512_mask_scalef_round_ps + // CHECK-LABEL: test_mm512_mask_scalef_round_ps // CHECK: @llvm.x86.avx512.mask.scalef.ps.512 return _mm512_mask_scalef_round_ps(__W, __U, __A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m512 test_mm512_maskz_scalef_round_ps(__mmask16 __U, __m512 __A, __m512 __B) { - // CHECK-LABEL: @test_mm512_maskz_scalef_round_ps + // CHECK-LABEL: test_mm512_maskz_scalef_round_ps // CHECK: @llvm.x86.avx512.mask.scalef.ps.512 return _mm512_maskz_scalef_round_ps(__U, __A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m512 test_mm512_scalef_ps(__m512 __A, __m512 __B) { - // CHECK-LABEL: @test_mm512_scalef_ps + // CHECK-LABEL: test_mm512_scalef_ps // CHECK: @llvm.x86.avx512.mask.scalef.ps.512 return _mm512_scalef_ps(__A, __B); } __m512 test_mm512_mask_scalef_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { - // CHECK-LABEL: @test_mm512_mask_scalef_ps + // CHECK-LABEL: test_mm512_mask_scalef_ps // CHECK: @llvm.x86.avx512.mask.scalef.ps.512 return _mm512_mask_scalef_ps(__W, __U, __A, __B); } __m512 test_mm512_maskz_scalef_ps(__mmask16 __U, __m512 __A, __m512 __B) { - // CHECK-LABEL: @test_mm512_maskz_scalef_ps + // CHECK-LABEL: test_mm512_maskz_scalef_ps // CHECK: @llvm.x86.avx512.mask.scalef.ps.512 return _mm512_maskz_scalef_ps(__U, __A, __B); } __m128d test_mm_scalef_round_sd(__m128d __A, __m128d __B) { - // CHECK-LABEL: @test_mm_scalef_round_sd + // CHECK-LABEL: test_mm_scalef_round_sd // CHECK: @llvm.x86.avx512.mask.scalef.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %2, i8 -1, i32 11) return _mm_scalef_round_sd(__A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m128d test_mm_scalef_sd(__m128d __A, __m128d __B) { - // CHECK-LABEL: @test_mm_scalef_sd + // CHECK-LABEL: test_mm_scalef_sd // CHECK: @llvm.x86.avx512.mask.scalef return _mm_scalef_sd(__A, __B); } __m128d test_mm_mask_scalef_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B){ - // CHECK-LABEL: @test_mm_mask_scalef_sd + // CHECK-LABEL: test_mm_mask_scalef_sd // CHECK: @llvm.x86.avx512.mask.scalef.sd return _mm_mask_scalef_sd(__W, __U, __A, __B); } __m128d test_mm_mask_scalef_round_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B){ - // CHECK-LABEL: @test_mm_mask_scalef_round_sd + // CHECK-LABEL: test_mm_mask_scalef_round_sd // CHECK: @llvm.x86.avx512.mask.scalef.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 %{{.*}}, i32 11) return _mm_mask_scalef_round_sd(__W, __U, __A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m128d test_mm_maskz_scalef_sd(__mmask8 __U, __m128d __A, __m128d __B){ - // CHECK-LABEL: @test_mm_maskz_scalef_sd + // CHECK-LABEL: test_mm_maskz_scalef_sd // CHECK: @llvm.x86.avx512.mask.scalef.sd return _mm_maskz_scalef_sd(__U, __A, __B); } __m128d test_mm_maskz_scalef_round_sd(__mmask8 __U, __m128d __A, __m128d __B){ - // CHECK-LABEL: @test_mm_maskz_scalef_round_sd + // CHECK-LABEL: test_mm_maskz_scalef_round_sd // CHECK: @llvm.x86.avx512.mask.scalef.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 %{{.*}}, i32 11) return _mm_maskz_scalef_round_sd(__U, __A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m128 test_mm_scalef_round_ss(__m128 __A, __m128 __B) { - // CHECK-LABEL: @test_mm_scalef_round_ss + // CHECK-LABEL: test_mm_scalef_round_ss // CHECK: @llvm.x86.avx512.mask.scalef.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 -1, i32 11) return _mm_scalef_round_ss(__A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m128 test_mm_scalef_ss(__m128 __A, __m128 __B) { - // CHECK-LABEL: @test_mm_scalef_ss + // CHECK-LABEL: test_mm_scalef_ss // CHECK: @llvm.x86.avx512.mask.scalef.ss return _mm_scalef_ss(__A, __B); } __m128 test_mm_mask_scalef_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B){ - // CHECK-LABEL: @test_mm_mask_scalef_ss + // CHECK-LABEL: test_mm_mask_scalef_ss // CHECK: @llvm.x86.avx512.mask.scalef.ss return _mm_mask_scalef_ss(__W, __U, __A, __B); } __m128 test_mm_mask_scalef_round_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B){ - // CHECK-LABEL: @test_mm_mask_scalef_round_ss + // CHECK-LABEL: test_mm_mask_scalef_round_ss // CHECK: @llvm.x86.avx512.mask.scalef.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 %{{.*}}, i32 11) return _mm_mask_scalef_round_ss(__W, __U, __A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m128 test_mm_maskz_scalef_ss(__mmask8 __U, __m128 __A, __m128 __B){ - // CHECK-LABEL: @test_mm_maskz_scalef_ss + // CHECK-LABEL: test_mm_maskz_scalef_ss // CHECK: @llvm.x86.avx512.mask.scalef.ss return _mm_maskz_scalef_ss(__U, __A, __B); } __m128 test_mm_maskz_scalef_round_ss(__mmask8 __U, __m128 __A, __m128 __B){ - // CHECK-LABEL: @test_mm_maskz_scalef_round_ss + // CHECK-LABEL: test_mm_maskz_scalef_round_ss // CHECK: @llvm.x86.avx512.mask.scalef.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 %{{.*}}, i32 11) return _mm_maskz_scalef_round_ss(__U, __A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m512i test_mm512_srai_epi32(__m512i __A) { - // CHECK-LABEL: @test_mm512_srai_epi32 + // CHECK-LABEL: test_mm512_srai_epi32 // CHECK: @llvm.x86.avx512.psrai.d.512 return _mm512_srai_epi32(__A, 5); } __m512i test_mm512_srai_epi32_2(__m512i __A, unsigned int __B) { - // CHECK-LABEL: @test_mm512_srai_epi32_2 + // CHECK-LABEL: test_mm512_srai_epi32_2 // CHECK: @llvm.x86.avx512.psrai.d.512 return _mm512_srai_epi32(__A, __B); } __m512i test_mm512_mask_srai_epi32(__m512i __W, __mmask16 __U, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_srai_epi32 + // CHECK-LABEL: test_mm512_mask_srai_epi32 // CHECK: @llvm.x86.avx512.psrai.d.512 // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_mask_srai_epi32(__W, __U, __A, 5); } __m512i test_mm512_mask_srai_epi32_2(__m512i __W, __mmask16 __U, __m512i __A, unsigned int __B) { - // CHECK-LABEL: @test_mm512_mask_srai_epi32_2 + // CHECK-LABEL: test_mm512_mask_srai_epi32_2 // CHECK: @llvm.x86.avx512.psrai.d.512 // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_mask_srai_epi32(__W, __U, __A, __B); } __m512i test_mm512_maskz_srai_epi32(__mmask16 __U, __m512i __A) { - // CHECK-LABEL: @test_mm512_maskz_srai_epi32 + // CHECK-LABEL: test_mm512_maskz_srai_epi32 // CHECK: @llvm.x86.avx512.psrai.d.512 // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_maskz_srai_epi32(__U, __A, 5); } __m512i test_mm512_maskz_srai_epi32_2(__mmask16 __U, __m512i __A, unsigned int __B) { - // CHECK-LABEL: @test_mm512_maskz_srai_epi32_2 + // CHECK-LABEL: test_mm512_maskz_srai_epi32_2 // CHECK: @llvm.x86.avx512.psrai.d.512 // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_maskz_srai_epi32(__U, __A, __B); } __m512i test_mm512_srai_epi64(__m512i __A) { - // CHECK-LABEL: @test_mm512_srai_epi64 + // CHECK-LABEL: test_mm512_srai_epi64 // CHECK: @llvm.x86.avx512.psrai.q.512 return _mm512_srai_epi64(__A, 5); } __m512i test_mm512_srai_epi64_2(__m512i __A, unsigned int __B) { - // CHECK-LABEL: @test_mm512_srai_epi64_2 + // CHECK-LABEL: test_mm512_srai_epi64_2 // CHECK: @llvm.x86.avx512.psrai.q.512 return _mm512_srai_epi64(__A, __B); } __m512i test_mm512_mask_srai_epi64(__m512i __W, __mmask8 __U, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_srai_epi64 + // CHECK-LABEL: test_mm512_mask_srai_epi64 // CHECK: @llvm.x86.avx512.psrai.q.512 // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_mask_srai_epi64(__W, __U, __A, 5); } __m512i test_mm512_mask_srai_epi64_2(__m512i __W, __mmask8 __U, __m512i __A, unsigned int __B) { - // CHECK-LABEL: @test_mm512_mask_srai_epi64_2 + // CHECK-LABEL: test_mm512_mask_srai_epi64_2 // CHECK: @llvm.x86.avx512.psrai.q.512 // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_mask_srai_epi64(__W, __U, __A, __B); } __m512i test_mm512_maskz_srai_epi64(__mmask8 __U, __m512i __A) { - // CHECK-LABEL: @test_mm512_maskz_srai_epi64 + // CHECK-LABEL: test_mm512_maskz_srai_epi64 // CHECK: @llvm.x86.avx512.psrai.q.512 // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_maskz_srai_epi64(__U, __A, 5); } __m512i test_mm512_maskz_srai_epi64_2(__mmask8 __U, __m512i __A, unsigned int __B) { - // CHECK-LABEL: @test_mm512_maskz_srai_epi64_2 + // CHECK-LABEL: test_mm512_maskz_srai_epi64_2 // CHECK: @llvm.x86.avx512.psrai.q.512 // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_maskz_srai_epi64(__U, __A, __B); } __m512i test_mm512_sll_epi32(__m512i __A, __m128i __B) { - // CHECK-LABEL: @test_mm512_sll_epi32 + // CHECK-LABEL: test_mm512_sll_epi32 // CHECK: @llvm.x86.avx512.psll.d.512 return _mm512_sll_epi32(__A, __B); } __m512i test_mm512_mask_sll_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B) { - // CHECK-LABEL: @test_mm512_mask_sll_epi32 + // CHECK-LABEL: test_mm512_mask_sll_epi32 // CHECK: @llvm.x86.avx512.psll.d.512 // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_mask_sll_epi32(__W, __U, __A, __B); } __m512i test_mm512_maskz_sll_epi32(__mmask16 __U, __m512i __A, __m128i __B) { - // CHECK-LABEL: @test_mm512_maskz_sll_epi32 + // CHECK-LABEL: test_mm512_maskz_sll_epi32 // CHECK: @llvm.x86.avx512.psll.d.512 // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_maskz_sll_epi32(__U, __A, __B); } __m512i test_mm512_sll_epi64(__m512i __A, __m128i __B) { - // CHECK-LABEL: @test_mm512_sll_epi64 + // CHECK-LABEL: test_mm512_sll_epi64 // CHECK: @llvm.x86.avx512.psll.q.512 return _mm512_sll_epi64(__A, __B); } __m512i test_mm512_mask_sll_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B) { - // CHECK-LABEL: @test_mm512_mask_sll_epi64 + // CHECK-LABEL: test_mm512_mask_sll_epi64 // CHECK: @llvm.x86.avx512.psll.q.512 // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_mask_sll_epi64(__W, __U, __A, __B); } __m512i test_mm512_maskz_sll_epi64(__mmask8 __U, __m512i __A, __m128i __B) { - // CHECK-LABEL: @test_mm512_maskz_sll_epi64 + // CHECK-LABEL: test_mm512_maskz_sll_epi64 // CHECK: @llvm.x86.avx512.psll.q.512 // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_maskz_sll_epi64(__U, __A, __B); } __m512i test_mm512_sllv_epi32(__m512i __X, __m512i __Y) { - // CHECK-LABEL: @test_mm512_sllv_epi32 + // CHECK-LABEL: test_mm512_sllv_epi32 // CHECK: @llvm.x86.avx512.psllv.d.512 return _mm512_sllv_epi32(__X, __Y); } __m512i test_mm512_mask_sllv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y) { - // CHECK-LABEL: @test_mm512_mask_sllv_epi32 + // CHECK-LABEL: test_mm512_mask_sllv_epi32 // CHECK: @llvm.x86.avx512.psllv.d.512 // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_mask_sllv_epi32(__W, __U, __X, __Y); } __m512i test_mm512_maskz_sllv_epi32(__mmask16 __U, __m512i __X, __m512i __Y) { - // CHECK-LABEL: @test_mm512_maskz_sllv_epi32 + // CHECK-LABEL: test_mm512_maskz_sllv_epi32 // CHECK: @llvm.x86.avx512.psllv.d.512 // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_maskz_sllv_epi32(__U, __X, __Y); } __m512i test_mm512_sllv_epi64(__m512i __X, __m512i __Y) { - // CHECK-LABEL: @test_mm512_sllv_epi64 + // CHECK-LABEL: test_mm512_sllv_epi64 // CHECK: @llvm.x86.avx512.psllv.q.512 return _mm512_sllv_epi64(__X, __Y); } __m512i test_mm512_mask_sllv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y) { - // CHECK-LABEL: @test_mm512_mask_sllv_epi64 + // CHECK-LABEL: test_mm512_mask_sllv_epi64 // CHECK: @llvm.x86.avx512.psllv.q.512 // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_mask_sllv_epi64(__W, __U, __X, __Y); } __m512i test_mm512_maskz_sllv_epi64(__mmask8 __U, __m512i __X, __m512i __Y) { - // CHECK-LABEL: @test_mm512_maskz_sllv_epi64 + // CHECK-LABEL: test_mm512_maskz_sllv_epi64 // CHECK: @llvm.x86.avx512.psllv.q.512 // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_maskz_sllv_epi64(__U, __X, __Y); } __m512i test_mm512_sra_epi32(__m512i __A, __m128i __B) { - // CHECK-LABEL: @test_mm512_sra_epi32 + // CHECK-LABEL: test_mm512_sra_epi32 // CHECK: @llvm.x86.avx512.psra.d.512 return _mm512_sra_epi32(__A, __B); } __m512i test_mm512_mask_sra_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B) { - // CHECK-LABEL: @test_mm512_mask_sra_epi32 + // CHECK-LABEL: test_mm512_mask_sra_epi32 // CHECK: @llvm.x86.avx512.psra.d.512 // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_mask_sra_epi32(__W, __U, __A, __B); } __m512i test_mm512_maskz_sra_epi32(__mmask16 __U, __m512i __A, __m128i __B) { - // CHECK-LABEL: @test_mm512_maskz_sra_epi32 + // CHECK-LABEL: test_mm512_maskz_sra_epi32 // CHECK: @llvm.x86.avx512.psra.d.512 // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_maskz_sra_epi32(__U, __A, __B); } __m512i test_mm512_sra_epi64(__m512i __A, __m128i __B) { - // CHECK-LABEL: @test_mm512_sra_epi64 + // CHECK-LABEL: test_mm512_sra_epi64 // CHECK: @llvm.x86.avx512.psra.q.512 return _mm512_sra_epi64(__A, __B); } __m512i test_mm512_mask_sra_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B) { - // CHECK-LABEL: @test_mm512_mask_sra_epi64 + // CHECK-LABEL: test_mm512_mask_sra_epi64 // CHECK: @llvm.x86.avx512.psra.q.512 // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_mask_sra_epi64(__W, __U, __A, __B); } __m512i test_mm512_maskz_sra_epi64(__mmask8 __U, __m512i __A, __m128i __B) { - // CHECK-LABEL: @test_mm512_maskz_sra_epi64 + // CHECK-LABEL: test_mm512_maskz_sra_epi64 // CHECK: @llvm.x86.avx512.psra.q.512 // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_maskz_sra_epi64(__U, __A, __B); } __m512i test_mm512_srav_epi32(__m512i __X, __m512i __Y) { - // CHECK-LABEL: @test_mm512_srav_epi32 + // CHECK-LABEL: test_mm512_srav_epi32 // CHECK: @llvm.x86.avx512.psrav.d.512 return _mm512_srav_epi32(__X, __Y); } __m512i test_mm512_mask_srav_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y) { - // CHECK-LABEL: @test_mm512_mask_srav_epi32 + // CHECK-LABEL: test_mm512_mask_srav_epi32 // CHECK: @llvm.x86.avx512.psrav.d.512 // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_mask_srav_epi32(__W, __U, __X, __Y); } __m512i test_mm512_maskz_srav_epi32(__mmask16 __U, __m512i __X, __m512i __Y) { - // CHECK-LABEL: @test_mm512_maskz_srav_epi32 + // CHECK-LABEL: test_mm512_maskz_srav_epi32 // CHECK: @llvm.x86.avx512.psrav.d.512 // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_maskz_srav_epi32(__U, __X, __Y); } __m512i test_mm512_srav_epi64(__m512i __X, __m512i __Y) { - // CHECK-LABEL: @test_mm512_srav_epi64 + // CHECK-LABEL: test_mm512_srav_epi64 // CHECK: @llvm.x86.avx512.psrav.q.512 return _mm512_srav_epi64(__X, __Y); } __m512i test_mm512_mask_srav_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y) { - // CHECK-LABEL: @test_mm512_mask_srav_epi64 + // CHECK-LABEL: test_mm512_mask_srav_epi64 // CHECK: @llvm.x86.avx512.psrav.q.512 // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_mask_srav_epi64(__W, __U, __X, __Y); } __m512i test_mm512_maskz_srav_epi64(__mmask8 __U, __m512i __X, __m512i __Y) { - // CHECK-LABEL: @test_mm512_maskz_srav_epi64 + // CHECK-LABEL: test_mm512_maskz_srav_epi64 // CHECK: @llvm.x86.avx512.psrav.q.512 // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_maskz_srav_epi64(__U, __X, __Y); } __m512i test_mm512_srl_epi32(__m512i __A, __m128i __B) { - // CHECK-LABEL: @test_mm512_srl_epi32 + // CHECK-LABEL: test_mm512_srl_epi32 // CHECK: @llvm.x86.avx512.psrl.d.512 return _mm512_srl_epi32(__A, __B); } __m512i test_mm512_mask_srl_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B) { - // CHECK-LABEL: @test_mm512_mask_srl_epi32 + // CHECK-LABEL: test_mm512_mask_srl_epi32 // CHECK: @llvm.x86.avx512.psrl.d.512 // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_mask_srl_epi32(__W, __U, __A, __B); } __m512i test_mm512_maskz_srl_epi32(__mmask16 __U, __m512i __A, __m128i __B) { - // CHECK-LABEL: @test_mm512_maskz_srl_epi32 + // CHECK-LABEL: test_mm512_maskz_srl_epi32 // CHECK: @llvm.x86.avx512.psrl.d.512 // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_maskz_srl_epi32(__U, __A, __B); } __m512i test_mm512_srl_epi64(__m512i __A, __m128i __B) { - // CHECK-LABEL: @test_mm512_srl_epi64 + // CHECK-LABEL: test_mm512_srl_epi64 // CHECK: @llvm.x86.avx512.psrl.q.512 return _mm512_srl_epi64(__A, __B); } __m512i test_mm512_mask_srl_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B) { - // CHECK-LABEL: @test_mm512_mask_srl_epi64 + // CHECK-LABEL: test_mm512_mask_srl_epi64 // CHECK: @llvm.x86.avx512.psrl.q.512 // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_mask_srl_epi64(__W, __U, __A, __B); } __m512i test_mm512_maskz_srl_epi64(__mmask8 __U, __m512i __A, __m128i __B) { - // CHECK-LABEL: @test_mm512_maskz_srl_epi64 + // CHECK-LABEL: test_mm512_maskz_srl_epi64 // CHECK: @llvm.x86.avx512.psrl.q.512 // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_maskz_srl_epi64(__U, __A, __B); } __m512i test_mm512_srlv_epi32(__m512i __X, __m512i __Y) { - // CHECK-LABEL: @test_mm512_srlv_epi32 + // CHECK-LABEL: test_mm512_srlv_epi32 // CHECK: @llvm.x86.avx512.psrlv.d.512 return _mm512_srlv_epi32(__X, __Y); } __m512i test_mm512_mask_srlv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y) { - // CHECK-LABEL: @test_mm512_mask_srlv_epi32 + // CHECK-LABEL: test_mm512_mask_srlv_epi32 // CHECK: @llvm.x86.avx512.psrlv.d.512 // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_mask_srlv_epi32(__W, __U, __X, __Y); } __m512i test_mm512_maskz_srlv_epi32(__mmask16 __U, __m512i __X, __m512i __Y) { - // CHECK-LABEL: @test_mm512_maskz_srlv_epi32 + // CHECK-LABEL: test_mm512_maskz_srlv_epi32 // CHECK: @llvm.x86.avx512.psrlv.d.512 // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_maskz_srlv_epi32(__U, __X, __Y); } __m512i test_mm512_srlv_epi64(__m512i __X, __m512i __Y) { - // CHECK-LABEL: @test_mm512_srlv_epi64 + // CHECK-LABEL: test_mm512_srlv_epi64 // CHECK: @llvm.x86.avx512.psrlv.q.512 return _mm512_srlv_epi64(__X, __Y); } __m512i test_mm512_mask_srlv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y) { - // CHECK-LABEL: @test_mm512_mask_srlv_epi64 + // CHECK-LABEL: test_mm512_mask_srlv_epi64 // CHECK: @llvm.x86.avx512.psrlv.q.512 // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_mask_srlv_epi64(__W, __U, __X, __Y); } __m512i test_mm512_maskz_srlv_epi64(__mmask8 __U, __m512i __X, __m512i __Y) { - // CHECK-LABEL: @test_mm512_maskz_srlv_epi64 + // CHECK-LABEL: test_mm512_maskz_srlv_epi64 // CHECK: @llvm.x86.avx512.psrlv.q.512 // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_maskz_srlv_epi64(__U, __X, __Y); } __m512i test_mm512_ternarylogic_epi32(__m512i __A, __m512i __B, __m512i __C) { - // CHECK-LABEL: @test_mm512_ternarylogic_epi32 + // CHECK-LABEL: test_mm512_ternarylogic_epi32 // CHECK: @llvm.x86.avx512.pternlog.d.512({{.*}}, i32 240) return _mm512_ternarylogic_epi32(__A, __B, __C, _MM_TERNLOG_A); } __m512i test_mm512_mask_ternarylogic_epi32(__m512i __A, __mmask16 __U, __m512i __B, __m512i __C) { - // CHECK-LABEL: @test_mm512_mask_ternarylogic_epi32 + // CHECK-LABEL: test_mm512_mask_ternarylogic_epi32 // CHECK: @llvm.x86.avx512.pternlog.d.512({{.*}}, i32 204) // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_mask_ternarylogic_epi32(__A, __U, __B, __C, _MM_TERNLOG_B); } __m512i test_mm512_maskz_ternarylogic_epi32(__mmask16 __U, __m512i __A, __m512i __B, __m512i __C) { - // CHECK-LABEL: @test_mm512_maskz_ternarylogic_epi32 + // CHECK-LABEL: test_mm512_maskz_ternarylogic_epi32 // CHECK: @llvm.x86.avx512.pternlog.d.512({{.*}}, i32 170) // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> zeroinitializer return _mm512_maskz_ternarylogic_epi32(__U, __A, __B, __C, _MM_TERNLOG_C); } __m512i test_mm512_ternarylogic_epi64(__m512i __A, __m512i __B, __m512i __C) { - // CHECK-LABEL: @test_mm512_ternarylogic_epi64 + // CHECK-LABEL: test_mm512_ternarylogic_epi64 // CHECK: @llvm.x86.avx512.pternlog.q.512({{.*}}, i32 192) return _mm512_ternarylogic_epi64(__A, __B, __C, _MM_TERNLOG_A & _MM_TERNLOG_B); } __m512i test_mm512_mask_ternarylogic_epi64(__m512i __A, __mmask8 __U, __m512i __B, __m512i __C) { - // CHECK-LABEL: @test_mm512_mask_ternarylogic_epi64 + // CHECK-LABEL: test_mm512_mask_ternarylogic_epi64 // CHECK: @llvm.x86.avx512.pternlog.q.512({{.*}}, i32 238) // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_mask_ternarylogic_epi64(__A, __U, __B, __C, _MM_TERNLOG_B | _MM_TERNLOG_C); } __m512i test_mm512_maskz_ternarylogic_epi64(__mmask8 __U, __m512i __A, __m512i __B, __m512i __C) { - // CHECK-LABEL: @test_mm512_maskz_ternarylogic_epi64 + // CHECK-LABEL: test_mm512_maskz_ternarylogic_epi64 // CHECK: @llvm.x86.avx512.pternlog.q.512({{.*}}, i32 111) // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> zeroinitializer return _mm512_maskz_ternarylogic_epi64(__U, __A, __B, __C, ~_MM_TERNLOG_A | (_MM_TERNLOG_B ^ _MM_TERNLOG_C)); } __m512 test_mm512_shuffle_f32x4(__m512 __A, __m512 __B) { - // CHECK-LABEL: @test_mm512_shuffle_f32x4 + // CHECK-LABEL: test_mm512_shuffle_f32x4 // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x i32> return _mm512_shuffle_f32x4(__A, __B, 4); } __m512 test_mm512_mask_shuffle_f32x4(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { - // CHECK-LABEL: @test_mm512_mask_shuffle_f32x4 + // CHECK-LABEL: test_mm512_mask_shuffle_f32x4 // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x i32> // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return _mm512_mask_shuffle_f32x4(__W, __U, __A, __B, 4); } __m512 test_mm512_maskz_shuffle_f32x4(__mmask16 __U, __m512 __A, __m512 __B) { - // CHECK-LABEL: @test_mm512_maskz_shuffle_f32x4 + // CHECK-LABEL: test_mm512_maskz_shuffle_f32x4 // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x i32> // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return _mm512_maskz_shuffle_f32x4(__U, __A, __B, 4); } __m512d test_mm512_shuffle_f64x2(__m512d __A, __m512d __B) { - // CHECK-LABEL: @test_mm512_shuffle_f64x2 + // CHECK-LABEL: test_mm512_shuffle_f64x2 // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x i32> return _mm512_shuffle_f64x2(__A, __B, 4); } __m512d test_mm512_mask_shuffle_f64x2(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { - // CHECK-LABEL: @test_mm512_mask_shuffle_f64x2 + // CHECK-LABEL: test_mm512_mask_shuffle_f64x2 // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x i32> // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} return _mm512_mask_shuffle_f64x2(__W, __U, __A, __B, 4); } __m512d test_mm512_maskz_shuffle_f64x2(__mmask8 __U, __m512d __A, __m512d __B) { - // CHECK-LABEL: @test_mm512_maskz_shuffle_f64x2 + // CHECK-LABEL: test_mm512_maskz_shuffle_f64x2 // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x i32> // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} return _mm512_maskz_shuffle_f64x2(__U, __A, __B, 4); } __m512i test_mm512_shuffle_i32x4(__m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_shuffle_i32x4 + // CHECK-LABEL: test_mm512_shuffle_i32x4 // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> return _mm512_shuffle_i32x4(__A, __B, 4); } __m512i test_mm512_mask_shuffle_i32x4(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_mask_shuffle_i32x4 + // CHECK-LABEL: test_mm512_mask_shuffle_i32x4 // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_mask_shuffle_i32x4(__W, __U, __A, __B, 4); } __m512i test_mm512_maskz_shuffle_i32x4(__mmask16 __U, __m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_maskz_shuffle_i32x4 + // CHECK-LABEL: test_mm512_maskz_shuffle_i32x4 // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_maskz_shuffle_i32x4(__U, __A, __B, 4); } __m512i test_mm512_shuffle_i64x2(__m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_shuffle_i64x2 + // CHECK-LABEL: test_mm512_shuffle_i64x2 // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> return _mm512_shuffle_i64x2(__A, __B, 4); } __m512i test_mm512_mask_shuffle_i64x2(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_mask_shuffle_i64x2 + // CHECK-LABEL: test_mm512_mask_shuffle_i64x2 // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_mask_shuffle_i64x2(__W, __U, __A, __B, 4); } __m512i test_mm512_maskz_shuffle_i64x2(__mmask8 __U, __m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_maskz_shuffle_i64x2 + // CHECK-LABEL: test_mm512_maskz_shuffle_i64x2 // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_maskz_shuffle_i64x2(__U, __A, __B, 4); } __m512d test_mm512_shuffle_pd(__m512d __M, __m512d __V) { - // CHECK-LABEL: @test_mm512_shuffle_pd + // CHECK-LABEL: test_mm512_shuffle_pd // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x i32> return _mm512_shuffle_pd(__M, __V, 4); } __m512d test_mm512_mask_shuffle_pd(__m512d __W, __mmask8 __U, __m512d __M, __m512d __V) { - // CHECK-LABEL: @test_mm512_mask_shuffle_pd + // CHECK-LABEL: test_mm512_mask_shuffle_pd // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x i32> // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} return _mm512_mask_shuffle_pd(__W, __U, __M, __V, 4); } __m512d test_mm512_maskz_shuffle_pd(__mmask8 __U, __m512d __M, __m512d __V) { - // CHECK-LABEL: @test_mm512_maskz_shuffle_pd + // CHECK-LABEL: test_mm512_maskz_shuffle_pd // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x i32> // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} return _mm512_maskz_shuffle_pd(__U, __M, __V, 4); } __m512 test_mm512_shuffle_ps(__m512 __M, __m512 __V) { - // CHECK-LABEL: @test_mm512_shuffle_ps + // CHECK-LABEL: test_mm512_shuffle_ps // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x i32> return _mm512_shuffle_ps(__M, __V, 4); } __m512 test_mm512_mask_shuffle_ps(__m512 __W, __mmask16 __U, __m512 __M, __m512 __V) { - // CHECK-LABEL: @test_mm512_mask_shuffle_ps + // CHECK-LABEL: test_mm512_mask_shuffle_ps // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x i32> // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return _mm512_mask_shuffle_ps(__W, __U, __M, __V, 4); } __m512 test_mm512_maskz_shuffle_ps(__mmask16 __U, __m512 __M, __m512 __V) { - // CHECK-LABEL: @test_mm512_maskz_shuffle_ps + // CHECK-LABEL: test_mm512_maskz_shuffle_ps // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x i32> // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return _mm512_maskz_shuffle_ps(__U, __M, __V, 4); } __m128d test_mm_sqrt_round_sd(__m128d __A, __m128d __B) { - // CHECK-LABEL: @test_mm_sqrt_round_sd - // CHECK: call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 -1, i32 11) + // CHECK-LABEL: test_mm_sqrt_round_sd + // CHECK: call {{.*}}<2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 -1, i32 11) return _mm_sqrt_round_sd(__A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m128d test_mm_mask_sqrt_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B){ - // CHECK-LABEL: @test_mm_mask_sqrt_sd + // CHECK-LABEL: test_mm_mask_sqrt_sd // CHECK: extractelement <2 x double> %{{.*}}, i64 0 // CHECK-NEXT: call double @llvm.sqrt.f64(double %{{.*}}) // CHECK-NEXT: extractelement <2 x double> %{{.*}}, i64 0 @@ -6178,13 +6180,13 @@ __m128d test_mm_mask_sqrt_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B } __m128d test_mm_mask_sqrt_round_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B){ - // CHECK-LABEL: @test_mm_mask_sqrt_round_sd - // CHECK: call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 %{{.*}}, i32 11) + // CHECK-LABEL: test_mm_mask_sqrt_round_sd + // CHECK: call {{.*}}<2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 %{{.*}}, i32 11) return _mm_mask_sqrt_round_sd(__W,__U,__A,__B,_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m128d test_mm_maskz_sqrt_sd(__mmask8 __U, __m128d __A, __m128d __B){ - // CHECK-LABEL: @test_mm_maskz_sqrt_sd + // CHECK-LABEL: test_mm_maskz_sqrt_sd // CHECK: extractelement <2 x double> %{{.*}}, i64 0 // CHECK-NEXT: call double @llvm.sqrt.f64(double %{{.*}}) // CHECK-NEXT: extractelement <2 x double> %{{.*}}, i64 0 @@ -6196,19 +6198,19 @@ __m128d test_mm_maskz_sqrt_sd(__mmask8 __U, __m128d __A, __m128d __B){ } __m128d test_mm_maskz_sqrt_round_sd(__mmask8 __U, __m128d __A, __m128d __B){ - // CHECK-LABEL: @test_mm_maskz_sqrt_round_sd - // CHECK: call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 %{{.*}}, i32 11) + // CHECK-LABEL: test_mm_maskz_sqrt_round_sd + // CHECK: call {{.*}}<2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 %{{.*}}, i32 11) return _mm_maskz_sqrt_round_sd(__U,__A,__B,_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m128 test_mm_sqrt_round_ss(__m128 __A, __m128 __B) { - // CHECK-LABEL: @test_mm_sqrt_round_ss - // CHECK: call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 -1, i32 11) + // CHECK-LABEL: test_mm_sqrt_round_ss + // CHECK: call {{.*}}<4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 -1, i32 11) return _mm_sqrt_round_ss(__A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m128 test_mm_mask_sqrt_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B){ - // CHECK-LABEL: @test_mm_mask_sqrt_ss + // CHECK-LABEL: test_mm_mask_sqrt_ss // CHECK: extractelement <4 x float> %{{.*}}, i64 0 // CHECK-NEXT: call float @llvm.sqrt.f32(float %{{.*}}) // CHECK-NEXT: extractelement <4 x float> %{{.*}}, i64 0 @@ -6220,13 +6222,13 @@ __m128 test_mm_mask_sqrt_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B){ } __m128 test_mm_mask_sqrt_round_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B){ - // CHECK-LABEL: @test_mm_mask_sqrt_round_ss - // CHECK: call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 {{.*}}, i32 11) + // CHECK-LABEL: test_mm_mask_sqrt_round_ss + // CHECK: call {{.*}}<4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 {{.*}}, i32 11) return _mm_mask_sqrt_round_ss(__W,__U,__A,__B,_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m128 test_mm_maskz_sqrt_ss(__mmask8 __U, __m128 __A, __m128 __B){ - // CHECK-LABEL: @test_mm_maskz_sqrt_ss + // CHECK-LABEL: test_mm_maskz_sqrt_ss // CHECK: extractelement <4 x float> %{{.*}}, i64 0 // CHECK-NEXT: call float @llvm.sqrt.f32(float %{{.*}}) // CHECK-NEXT: extractelement <4 x float> %{{.*}}, i64 0 @@ -6238,1163 +6240,1163 @@ __m128 test_mm_maskz_sqrt_ss(__mmask8 __U, __m128 __A, __m128 __B){ } __m128 test_mm_maskz_sqrt_round_ss(__mmask8 __U, __m128 __A, __m128 __B){ - // CHECK-LABEL: @test_mm_maskz_sqrt_round_ss - // CHECK: call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 {{.*}}, i32 11) + // CHECK-LABEL: test_mm_maskz_sqrt_round_ss + // CHECK: call {{.*}}<4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 {{.*}}, i32 11) return _mm_maskz_sqrt_round_ss(__U,__A,__B,_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m512 test_mm512_broadcast_f32x4(float const* __A) { - // CHECK-LABEL: @test_mm512_broadcast_f32x4 + // CHECK-LABEL: test_mm512_broadcast_f32x4 // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <16 x i32> return _mm512_broadcast_f32x4(_mm_loadu_ps(__A)); } __m512 test_mm512_mask_broadcast_f32x4(__m512 __O, __mmask16 __M, float const* __A) { - // CHECK-LABEL: @test_mm512_mask_broadcast_f32x4 + // CHECK-LABEL: test_mm512_mask_broadcast_f32x4 // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <16 x i32> // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return _mm512_mask_broadcast_f32x4(__O, __M, _mm_loadu_ps(__A)); } __m512 test_mm512_maskz_broadcast_f32x4(__mmask16 __M, float const* __A) { - // CHECK-LABEL: @test_mm512_maskz_broadcast_f32x4 + // CHECK-LABEL: test_mm512_maskz_broadcast_f32x4 // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <16 x i32> // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return _mm512_maskz_broadcast_f32x4(__M, _mm_loadu_ps(__A)); } __m512d test_mm512_broadcast_f64x4(double const* __A) { - // CHECK-LABEL: @test_mm512_broadcast_f64x4 + // CHECK-LABEL: test_mm512_broadcast_f64x4 // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <8 x i32> return _mm512_broadcast_f64x4(_mm256_loadu_pd(__A)); } __m512d test_mm512_mask_broadcast_f64x4(__m512d __O, __mmask8 __M, double const* __A) { - // CHECK-LABEL: @test_mm512_mask_broadcast_f64x4 + // CHECK-LABEL: test_mm512_mask_broadcast_f64x4 // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <8 x i32> // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} return _mm512_mask_broadcast_f64x4(__O, __M, _mm256_loadu_pd(__A)); } __m512d test_mm512_maskz_broadcast_f64x4(__mmask8 __M, double const* __A) { - // CHECK-LABEL: @test_mm512_maskz_broadcast_f64x4 + // CHECK-LABEL: test_mm512_maskz_broadcast_f64x4 // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <8 x i32> // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} return _mm512_maskz_broadcast_f64x4(__M, _mm256_loadu_pd(__A)); } __m512i test_mm512_broadcast_i32x4(__m128i const* __A) { - // CHECK-LABEL: @test_mm512_broadcast_i32x4 + // CHECK-LABEL: test_mm512_broadcast_i32x4 // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <16 x i32> return _mm512_broadcast_i32x4(_mm_loadu_si128(__A)); } __m512i test_mm512_mask_broadcast_i32x4(__m512i __O, __mmask16 __M, __m128i const* __A) { - // CHECK-LABEL: @test_mm512_mask_broadcast_i32x4 + // CHECK-LABEL: test_mm512_mask_broadcast_i32x4 // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <16 x i32> // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_mask_broadcast_i32x4(__O, __M, _mm_loadu_si128(__A)); } __m512i test_mm512_maskz_broadcast_i32x4(__mmask16 __M, __m128i const* __A) { - // CHECK-LABEL: @test_mm512_maskz_broadcast_i32x4 + // CHECK-LABEL: test_mm512_maskz_broadcast_i32x4 // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <16 x i32> // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_maskz_broadcast_i32x4(__M, _mm_loadu_si128(__A)); } __m512i test_mm512_broadcast_i64x4(__m256i const* __A) { - // CHECK-LABEL: @test_mm512_broadcast_i64x4 + // CHECK-LABEL: test_mm512_broadcast_i64x4 // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <8 x i32> return _mm512_broadcast_i64x4(_mm256_loadu_si256(__A)); } __m512i test_mm512_mask_broadcast_i64x4(__m512i __O, __mmask8 __M, __m256i const* __A) { - // CHECK-LABEL: @test_mm512_mask_broadcast_i64x4 + // CHECK-LABEL: test_mm512_mask_broadcast_i64x4 // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <8 x i32> // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_mask_broadcast_i64x4(__O, __M, _mm256_loadu_si256(__A)); } __m512i test_mm512_maskz_broadcast_i64x4(__mmask8 __M, __m256i const* __A) { - // CHECK-LABEL: @test_mm512_maskz_broadcast_i64x4 + // CHECK-LABEL: test_mm512_maskz_broadcast_i64x4 // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <8 x i32> // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_maskz_broadcast_i64x4(__M, _mm256_loadu_si256(__A)); } __m512d test_mm512_broadcastsd_pd(__m128d __A) { - // CHECK-LABEL: @test_mm512_broadcastsd_pd + // CHECK-LABEL: test_mm512_broadcastsd_pd // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <8 x i32> zeroinitializer return _mm512_broadcastsd_pd(__A); } __m512d test_mm512_mask_broadcastsd_pd(__m512d __O, __mmask8 __M, __m128d __A) { - // CHECK-LABEL: @test_mm512_mask_broadcastsd_pd + // CHECK-LABEL: test_mm512_mask_broadcastsd_pd // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <8 x i32> zeroinitializer // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} return _mm512_mask_broadcastsd_pd(__O, __M, __A); } __m512d test_mm512_maskz_broadcastsd_pd(__mmask8 __M, __m128d __A) { - // CHECK-LABEL: @test_mm512_maskz_broadcastsd_pd + // CHECK-LABEL: test_mm512_maskz_broadcastsd_pd // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <8 x i32> zeroinitializer // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} return _mm512_maskz_broadcastsd_pd(__M, __A); } __m512 test_mm512_broadcastss_ps(__m128 __A) { - // CHECK-LABEL: @test_mm512_broadcastss_ps + // CHECK-LABEL: test_mm512_broadcastss_ps // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <16 x i32> zeroinitializer return _mm512_broadcastss_ps(__A); } __m512 test_mm512_mask_broadcastss_ps(__m512 __O, __mmask16 __M, __m128 __A) { - // CHECK-LABEL: @test_mm512_mask_broadcastss_ps + // CHECK-LABEL: test_mm512_mask_broadcastss_ps // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <16 x i32> zeroinitializer // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return _mm512_mask_broadcastss_ps(__O, __M, __A); } __m512 test_mm512_maskz_broadcastss_ps(__mmask16 __M, __m128 __A) { - // CHECK-LABEL: @test_mm512_maskz_broadcastss_ps + // CHECK-LABEL: test_mm512_maskz_broadcastss_ps // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <16 x i32> zeroinitializer // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return _mm512_maskz_broadcastss_ps(__M, __A); } __m512i test_mm512_broadcastd_epi32(__m128i __A) { - // CHECK-LABEL: @test_mm512_broadcastd_epi32 + // CHECK-LABEL: test_mm512_broadcastd_epi32 // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <16 x i32> zeroinitializer return _mm512_broadcastd_epi32(__A); } __m512i test_mm512_mask_broadcastd_epi32(__m512i __O, __mmask16 __M, __m128i __A) { - // CHECK-LABEL: @test_mm512_mask_broadcastd_epi32 + // CHECK-LABEL: test_mm512_mask_broadcastd_epi32 // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <16 x i32> zeroinitializer // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_mask_broadcastd_epi32(__O, __M, __A); } __m512i test_mm512_maskz_broadcastd_epi32(__mmask16 __M, __m128i __A) { - // CHECK-LABEL: @test_mm512_maskz_broadcastd_epi32 + // CHECK-LABEL: test_mm512_maskz_broadcastd_epi32 // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <16 x i32> zeroinitializer // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_maskz_broadcastd_epi32(__M, __A); } __m512i test_mm512_broadcastq_epi64(__m128i __A) { - // CHECK-LABEL: @test_mm512_broadcastq_epi64 + // CHECK-LABEL: test_mm512_broadcastq_epi64 // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <8 x i32> zeroinitializer return _mm512_broadcastq_epi64(__A); } __m512i test_mm512_mask_broadcastq_epi64(__m512i __O, __mmask8 __M, __m128i __A) { - // CHECK-LABEL: @test_mm512_mask_broadcastq_epi64 + // CHECK-LABEL: test_mm512_mask_broadcastq_epi64 // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <8 x i32> zeroinitializer // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_mask_broadcastq_epi64(__O, __M, __A); } __m512i test_mm512_maskz_broadcastq_epi64(__mmask8 __M, __m128i __A) { - // CHECK-LABEL: @test_mm512_maskz_broadcastq_epi64 + // CHECK-LABEL: test_mm512_maskz_broadcastq_epi64 // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <8 x i32> zeroinitializer // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_maskz_broadcastq_epi64(__M, __A); } __m128i test_mm512_cvtsepi32_epi8(__m512i __A) { - // CHECK-LABEL: @test_mm512_cvtsepi32_epi8 + // CHECK-LABEL: test_mm512_cvtsepi32_epi8 // CHECK: @llvm.x86.avx512.mask.pmovs.db.512 return _mm512_cvtsepi32_epi8(__A); } __m128i test_mm512_mask_cvtsepi32_epi8(__m128i __O, __mmask16 __M, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_cvtsepi32_epi8 + // CHECK-LABEL: test_mm512_mask_cvtsepi32_epi8 // CHECK: @llvm.x86.avx512.mask.pmovs.db.512 return _mm512_mask_cvtsepi32_epi8(__O, __M, __A); } __m128i test_mm512_maskz_cvtsepi32_epi8(__mmask16 __M, __m512i __A) { - // CHECK-LABEL: @test_mm512_maskz_cvtsepi32_epi8 + // CHECK-LABEL: test_mm512_maskz_cvtsepi32_epi8 // CHECK: @llvm.x86.avx512.mask.pmovs.db.512 return _mm512_maskz_cvtsepi32_epi8(__M, __A); } void test_mm512_mask_cvtsepi32_storeu_epi8(void * __P, __mmask16 __M, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_cvtsepi32_storeu_epi8 + // CHECK-LABEL: test_mm512_mask_cvtsepi32_storeu_epi8 // CHECK: @llvm.x86.avx512.mask.pmovs.db.mem.512 return _mm512_mask_cvtsepi32_storeu_epi8(__P, __M, __A); } __m256i test_mm512_cvtsepi32_epi16(__m512i __A) { - // CHECK-LABEL: @test_mm512_cvtsepi32_epi16 + // CHECK-LABEL: test_mm512_cvtsepi32_epi16 // CHECK: @llvm.x86.avx512.mask.pmovs.dw.512 return _mm512_cvtsepi32_epi16(__A); } __m256i test_mm512_mask_cvtsepi32_epi16(__m256i __O, __mmask16 __M, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_cvtsepi32_epi16 + // CHECK-LABEL: test_mm512_mask_cvtsepi32_epi16 // CHECK: @llvm.x86.avx512.mask.pmovs.dw.512 return _mm512_mask_cvtsepi32_epi16(__O, __M, __A); } __m256i test_mm512_maskz_cvtsepi32_epi16(__mmask16 __M, __m512i __A) { - // CHECK-LABEL: @test_mm512_maskz_cvtsepi32_epi16 + // CHECK-LABEL: test_mm512_maskz_cvtsepi32_epi16 // CHECK: @llvm.x86.avx512.mask.pmovs.dw.512 return _mm512_maskz_cvtsepi32_epi16(__M, __A); } void test_mm512_mask_cvtsepi32_storeu_epi16(void *__P, __mmask16 __M, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_cvtsepi32_storeu_epi16 + // CHECK-LABEL: test_mm512_mask_cvtsepi32_storeu_epi16 // CHECK: @llvm.x86.avx512.mask.pmovs.dw.mem.512 return _mm512_mask_cvtsepi32_storeu_epi16(__P, __M, __A); } __m128i test_mm512_cvtsepi64_epi8(__m512i __A) { - // CHECK-LABEL: @test_mm512_cvtsepi64_epi8 + // CHECK-LABEL: test_mm512_cvtsepi64_epi8 // CHECK: @llvm.x86.avx512.mask.pmovs.qb.512 return _mm512_cvtsepi64_epi8(__A); } __m128i test_mm512_mask_cvtsepi64_epi8(__m128i __O, __mmask8 __M, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_cvtsepi64_epi8 + // CHECK-LABEL: test_mm512_mask_cvtsepi64_epi8 // CHECK: @llvm.x86.avx512.mask.pmovs.qb.512 return _mm512_mask_cvtsepi64_epi8(__O, __M, __A); } __m128i test_mm512_maskz_cvtsepi64_epi8(__mmask8 __M, __m512i __A) { - // CHECK-LABEL: @test_mm512_maskz_cvtsepi64_epi8 + // CHECK-LABEL: test_mm512_maskz_cvtsepi64_epi8 // CHECK: @llvm.x86.avx512.mask.pmovs.qb.512 return _mm512_maskz_cvtsepi64_epi8(__M, __A); } void test_mm512_mask_cvtsepi64_storeu_epi8(void * __P, __mmask8 __M, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_cvtsepi64_storeu_epi8 + // CHECK-LABEL: test_mm512_mask_cvtsepi64_storeu_epi8 // CHECK: @llvm.x86.avx512.mask.pmovs.qb.mem.512 return _mm512_mask_cvtsepi64_storeu_epi8(__P, __M, __A); } __m256i test_mm512_cvtsepi64_epi32(__m512i __A) { - // CHECK-LABEL: @test_mm512_cvtsepi64_epi32 + // CHECK-LABEL: test_mm512_cvtsepi64_epi32 // CHECK: @llvm.x86.avx512.mask.pmovs.qd.512 return _mm512_cvtsepi64_epi32(__A); } __m256i test_mm512_mask_cvtsepi64_epi32(__m256i __O, __mmask8 __M, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_cvtsepi64_epi32 + // CHECK-LABEL: test_mm512_mask_cvtsepi64_epi32 // CHECK: @llvm.x86.avx512.mask.pmovs.qd.512 return _mm512_mask_cvtsepi64_epi32(__O, __M, __A); } __m256i test_mm512_maskz_cvtsepi64_epi32(__mmask8 __M, __m512i __A) { - // CHECK-LABEL: @test_mm512_maskz_cvtsepi64_epi32 + // CHECK-LABEL: test_mm512_maskz_cvtsepi64_epi32 // CHECK: @llvm.x86.avx512.mask.pmovs.qd.512 return _mm512_maskz_cvtsepi64_epi32(__M, __A); } void test_mm512_mask_cvtsepi64_storeu_epi32(void *__P, __mmask8 __M, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_cvtsepi64_storeu_epi32 + // CHECK-LABEL: test_mm512_mask_cvtsepi64_storeu_epi32 // CHECK: @llvm.x86.avx512.mask.pmovs.qd.mem.512 return _mm512_mask_cvtsepi64_storeu_epi32(__P, __M, __A); } __m128i test_mm512_cvtsepi64_epi16(__m512i __A) { - // CHECK-LABEL: @test_mm512_cvtsepi64_epi16 + // CHECK-LABEL: test_mm512_cvtsepi64_epi16 // CHECK: @llvm.x86.avx512.mask.pmovs.qw.512 return _mm512_cvtsepi64_epi16(__A); } __m128i test_mm512_mask_cvtsepi64_epi16(__m128i __O, __mmask8 __M, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_cvtsepi64_epi16 + // CHECK-LABEL: test_mm512_mask_cvtsepi64_epi16 // CHECK: @llvm.x86.avx512.mask.pmovs.qw.512 return _mm512_mask_cvtsepi64_epi16(__O, __M, __A); } __m128i test_mm512_maskz_cvtsepi64_epi16(__mmask8 __M, __m512i __A) { - // CHECK-LABEL: @test_mm512_maskz_cvtsepi64_epi16 + // CHECK-LABEL: test_mm512_maskz_cvtsepi64_epi16 // CHECK: @llvm.x86.avx512.mask.pmovs.qw.512 return _mm512_maskz_cvtsepi64_epi16(__M, __A); } void test_mm512_mask_cvtsepi64_storeu_epi16(void * __P, __mmask8 __M, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_cvtsepi64_storeu_epi16 + // CHECK-LABEL: test_mm512_mask_cvtsepi64_storeu_epi16 // CHECK: @llvm.x86.avx512.mask.pmovs.qw.mem.512 return _mm512_mask_cvtsepi64_storeu_epi16(__P, __M, __A); } __m128i test_mm512_cvtusepi32_epi8(__m512i __A) { - // CHECK-LABEL: @test_mm512_cvtusepi32_epi8 + // CHECK-LABEL: test_mm512_cvtusepi32_epi8 // CHECK: @llvm.x86.avx512.mask.pmovus.db.512 return _mm512_cvtusepi32_epi8(__A); } __m128i test_mm512_mask_cvtusepi32_epi8(__m128i __O, __mmask16 __M, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_cvtusepi32_epi8 + // CHECK-LABEL: test_mm512_mask_cvtusepi32_epi8 // CHECK: @llvm.x86.avx512.mask.pmovus.db.512 return _mm512_mask_cvtusepi32_epi8(__O, __M, __A); } __m128i test_mm512_maskz_cvtusepi32_epi8(__mmask16 __M, __m512i __A) { - // CHECK-LABEL: @test_mm512_maskz_cvtusepi32_epi8 + // CHECK-LABEL: test_mm512_maskz_cvtusepi32_epi8 // CHECK: @llvm.x86.avx512.mask.pmovus.db.512 return _mm512_maskz_cvtusepi32_epi8(__M, __A); } void test_mm512_mask_cvtusepi32_storeu_epi8(void * __P, __mmask16 __M, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_cvtusepi32_storeu_epi8 + // CHECK-LABEL: test_mm512_mask_cvtusepi32_storeu_epi8 // CHECK: @llvm.x86.avx512.mask.pmovus.db.mem.512 return _mm512_mask_cvtusepi32_storeu_epi8(__P, __M, __A); } __m256i test_mm512_cvtusepi32_epi16(__m512i __A) { - // CHECK-LABEL: @test_mm512_cvtusepi32_epi16 + // CHECK-LABEL: test_mm512_cvtusepi32_epi16 // CHECK: @llvm.x86.avx512.mask.pmovus.dw.512 return _mm512_cvtusepi32_epi16(__A); } __m256i test_mm512_mask_cvtusepi32_epi16(__m256i __O, __mmask16 __M, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_cvtusepi32_epi16 + // CHECK-LABEL: test_mm512_mask_cvtusepi32_epi16 // CHECK: @llvm.x86.avx512.mask.pmovus.dw.512 return _mm512_mask_cvtusepi32_epi16(__O, __M, __A); } __m256i test_mm512_maskz_cvtusepi32_epi16(__mmask16 __M, __m512i __A) { - // CHECK-LABEL: @test_mm512_maskz_cvtusepi32_epi16 + // CHECK-LABEL: test_mm512_maskz_cvtusepi32_epi16 // CHECK: @llvm.x86.avx512.mask.pmovus.dw.512 return _mm512_maskz_cvtusepi32_epi16(__M, __A); } void test_mm512_mask_cvtusepi32_storeu_epi16(void *__P, __mmask16 __M, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_cvtusepi32_storeu_epi16 + // CHECK-LABEL: test_mm512_mask_cvtusepi32_storeu_epi16 // CHECK: @llvm.x86.avx512.mask.pmovus.dw.mem.512 return _mm512_mask_cvtusepi32_storeu_epi16(__P, __M, __A); } __m128i test_mm512_cvtusepi64_epi8(__m512i __A) { - // CHECK-LABEL: @test_mm512_cvtusepi64_epi8 + // CHECK-LABEL: test_mm512_cvtusepi64_epi8 // CHECK: @llvm.x86.avx512.mask.pmovus.qb.512 return _mm512_cvtusepi64_epi8(__A); } __m128i test_mm512_mask_cvtusepi64_epi8(__m128i __O, __mmask8 __M, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_cvtusepi64_epi8 + // CHECK-LABEL: test_mm512_mask_cvtusepi64_epi8 // CHECK: @llvm.x86.avx512.mask.pmovus.qb.512 return _mm512_mask_cvtusepi64_epi8(__O, __M, __A); } __m128i test_mm512_maskz_cvtusepi64_epi8(__mmask8 __M, __m512i __A) { - // CHECK-LABEL: @test_mm512_maskz_cvtusepi64_epi8 + // CHECK-LABEL: test_mm512_maskz_cvtusepi64_epi8 // CHECK: @llvm.x86.avx512.mask.pmovus.qb.512 return _mm512_maskz_cvtusepi64_epi8(__M, __A); } void test_mm512_mask_cvtusepi64_storeu_epi8(void * __P, __mmask8 __M, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_cvtusepi64_storeu_epi8 + // CHECK-LABEL: test_mm512_mask_cvtusepi64_storeu_epi8 // CHECK: @llvm.x86.avx512.mask.pmovus.qb.mem.512 return _mm512_mask_cvtusepi64_storeu_epi8(__P, __M, __A); } __m256i test_mm512_cvtusepi64_epi32(__m512i __A) { - // CHECK-LABEL: @test_mm512_cvtusepi64_epi32 + // CHECK-LABEL: test_mm512_cvtusepi64_epi32 // CHECK: @llvm.x86.avx512.mask.pmovus.qd.512 return _mm512_cvtusepi64_epi32(__A); } __m256i test_mm512_mask_cvtusepi64_epi32(__m256i __O, __mmask8 __M, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_cvtusepi64_epi32 + // CHECK-LABEL: test_mm512_mask_cvtusepi64_epi32 // CHECK: @llvm.x86.avx512.mask.pmovus.qd.512 return _mm512_mask_cvtusepi64_epi32(__O, __M, __A); } __m256i test_mm512_maskz_cvtusepi64_epi32(__mmask8 __M, __m512i __A) { - // CHECK-LABEL: @test_mm512_maskz_cvtusepi64_epi32 + // CHECK-LABEL: test_mm512_maskz_cvtusepi64_epi32 // CHECK: @llvm.x86.avx512.mask.pmovus.qd.512 return _mm512_maskz_cvtusepi64_epi32(__M, __A); } void test_mm512_mask_cvtusepi64_storeu_epi32(void* __P, __mmask8 __M, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_cvtusepi64_storeu_epi32 + // CHECK-LABEL: test_mm512_mask_cvtusepi64_storeu_epi32 // CHECK: @llvm.x86.avx512.mask.pmovus.qd.mem.512 return _mm512_mask_cvtusepi64_storeu_epi32(__P, __M, __A); } __m128i test_mm512_cvtusepi64_epi16(__m512i __A) { - // CHECK-LABEL: @test_mm512_cvtusepi64_epi16 + // CHECK-LABEL: test_mm512_cvtusepi64_epi16 // CHECK: @llvm.x86.avx512.mask.pmovus.qw.512 return _mm512_cvtusepi64_epi16(__A); } __m128i test_mm512_mask_cvtusepi64_epi16(__m128i __O, __mmask8 __M, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_cvtusepi64_epi16 + // CHECK-LABEL: test_mm512_mask_cvtusepi64_epi16 // CHECK: @llvm.x86.avx512.mask.pmovus.qw.512 return _mm512_mask_cvtusepi64_epi16(__O, __M, __A); } __m128i test_mm512_maskz_cvtusepi64_epi16(__mmask8 __M, __m512i __A) { - // CHECK-LABEL: @test_mm512_maskz_cvtusepi64_epi16 + // CHECK-LABEL: test_mm512_maskz_cvtusepi64_epi16 // CHECK: @llvm.x86.avx512.mask.pmovus.qw.512 return _mm512_maskz_cvtusepi64_epi16(__M, __A); } void test_mm512_mask_cvtusepi64_storeu_epi16(void *__P, __mmask8 __M, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_cvtusepi64_storeu_epi16 + // CHECK-LABEL: test_mm512_mask_cvtusepi64_storeu_epi16 // CHECK: @llvm.x86.avx512.mask.pmovus.qw.mem.512 return _mm512_mask_cvtusepi64_storeu_epi16(__P, __M, __A); } __m128i test_mm512_cvtepi32_epi8(__m512i __A) { - // CHECK-LABEL: @test_mm512_cvtepi32_epi8 + // CHECK-LABEL: test_mm512_cvtepi32_epi8 // CHECK: trunc <16 x i32> %{{.*}} to <16 x i8> return _mm512_cvtepi32_epi8(__A); } __m128i test_mm512_mask_cvtepi32_epi8(__m128i __O, __mmask16 __M, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_cvtepi32_epi8 + // CHECK-LABEL: test_mm512_mask_cvtepi32_epi8 // CHECK: @llvm.x86.avx512.mask.pmov.db.512 return _mm512_mask_cvtepi32_epi8(__O, __M, __A); } __m128i test_mm512_maskz_cvtepi32_epi8(__mmask16 __M, __m512i __A) { - // CHECK-LABEL: @test_mm512_maskz_cvtepi32_epi8 + // CHECK-LABEL: test_mm512_maskz_cvtepi32_epi8 // CHECK: @llvm.x86.avx512.mask.pmov.db.512 return _mm512_maskz_cvtepi32_epi8(__M, __A); } void test_mm512_mask_cvtepi32_storeu_epi8(void * __P, __mmask16 __M, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_cvtepi32_storeu_epi8 + // CHECK-LABEL: test_mm512_mask_cvtepi32_storeu_epi8 // CHECK: @llvm.x86.avx512.mask.pmov.db.mem.512 return _mm512_mask_cvtepi32_storeu_epi8(__P, __M, __A); } __m256i test_mm512_cvtepi32_epi16(__m512i __A) { - // CHECK-LABEL: @test_mm512_cvtepi32_epi16 + // CHECK-LABEL: test_mm512_cvtepi32_epi16 // CHECK: trunc <16 x i32> %{{.*}} to <16 x i16> return _mm512_cvtepi32_epi16(__A); } __m256i test_mm512_mask_cvtepi32_epi16(__m256i __O, __mmask16 __M, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_cvtepi32_epi16 + // CHECK-LABEL: test_mm512_mask_cvtepi32_epi16 // CHECK: @llvm.x86.avx512.mask.pmov.dw.512 return _mm512_mask_cvtepi32_epi16(__O, __M, __A); } __m256i test_mm512_maskz_cvtepi32_epi16(__mmask16 __M, __m512i __A) { - // CHECK-LABEL: @test_mm512_maskz_cvtepi32_epi16 + // CHECK-LABEL: test_mm512_maskz_cvtepi32_epi16 // CHECK: @llvm.x86.avx512.mask.pmov.dw.512 return _mm512_maskz_cvtepi32_epi16(__M, __A); } void test_mm512_mask_cvtepi32_storeu_epi16(void * __P, __mmask16 __M, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_cvtepi32_storeu_epi16 + // CHECK-LABEL: test_mm512_mask_cvtepi32_storeu_epi16 // CHECK: @llvm.x86.avx512.mask.pmov.dw.mem.512 return _mm512_mask_cvtepi32_storeu_epi16(__P, __M, __A); } __m128i test_mm512_cvtepi64_epi8(__m512i __A) { - // CHECK-LABEL: @test_mm512_cvtepi64_epi8 + // CHECK-LABEL: test_mm512_cvtepi64_epi8 // CHECK: @llvm.x86.avx512.mask.pmov.qb.512 return _mm512_cvtepi64_epi8(__A); } __m128i test_mm512_mask_cvtepi64_epi8(__m128i __O, __mmask8 __M, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_cvtepi64_epi8 + // CHECK-LABEL: test_mm512_mask_cvtepi64_epi8 // CHECK: @llvm.x86.avx512.mask.pmov.qb.512 return _mm512_mask_cvtepi64_epi8(__O, __M, __A); } __m128i test_mm512_maskz_cvtepi64_epi8(__mmask8 __M, __m512i __A) { - // CHECK-LABEL: @test_mm512_maskz_cvtepi64_epi8 + // CHECK-LABEL: test_mm512_maskz_cvtepi64_epi8 // CHECK: @llvm.x86.avx512.mask.pmov.qb.512 return _mm512_maskz_cvtepi64_epi8(__M, __A); } void test_mm512_mask_cvtepi64_storeu_epi8(void * __P, __mmask8 __M, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_cvtepi64_storeu_epi8 + // CHECK-LABEL: test_mm512_mask_cvtepi64_storeu_epi8 // CHECK: @llvm.x86.avx512.mask.pmov.qb.mem.512 return _mm512_mask_cvtepi64_storeu_epi8(__P, __M, __A); } __m256i test_mm512_cvtepi64_epi32(__m512i __A) { - // CHECK-LABEL: @test_mm512_cvtepi64_epi32 + // CHECK-LABEL: test_mm512_cvtepi64_epi32 // CHECK: trunc <8 x i64> %{{.*}} to <8 x i32> return _mm512_cvtepi64_epi32(__A); } __m256i test_mm512_mask_cvtepi64_epi32(__m256i __O, __mmask8 __M, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_cvtepi64_epi32 + // CHECK-LABEL: test_mm512_mask_cvtepi64_epi32 // CHECK: trunc <8 x i64> %{{.*}} to <8 x i32> // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} return _mm512_mask_cvtepi64_epi32(__O, __M, __A); } __m256i test_mm512_maskz_cvtepi64_epi32(__mmask8 __M, __m512i __A) { - // CHECK-LABEL: @test_mm512_maskz_cvtepi64_epi32 + // CHECK-LABEL: test_mm512_maskz_cvtepi64_epi32 // CHECK: trunc <8 x i64> %{{.*}} to <8 x i32> // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} return _mm512_maskz_cvtepi64_epi32(__M, __A); } void test_mm512_mask_cvtepi64_storeu_epi32(void* __P, __mmask8 __M, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_cvtepi64_storeu_epi32 + // CHECK-LABEL: test_mm512_mask_cvtepi64_storeu_epi32 // CHECK: @llvm.x86.avx512.mask.pmov.qd.mem.512 return _mm512_mask_cvtepi64_storeu_epi32(__P, __M, __A); } __m128i test_mm512_cvtepi64_epi16(__m512i __A) { - // CHECK-LABEL: @test_mm512_cvtepi64_epi16 + // CHECK-LABEL: test_mm512_cvtepi64_epi16 // CHECK: trunc <8 x i64> %{{.*}} to <8 x i16> return _mm512_cvtepi64_epi16(__A); } __m128i test_mm512_mask_cvtepi64_epi16(__m128i __O, __mmask8 __M, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_cvtepi64_epi16 + // CHECK-LABEL: test_mm512_mask_cvtepi64_epi16 // CHECK: @llvm.x86.avx512.mask.pmov.qw.512 return _mm512_mask_cvtepi64_epi16(__O, __M, __A); } __m128i test_mm512_maskz_cvtepi64_epi16(__mmask8 __M, __m512i __A) { - // CHECK-LABEL: @test_mm512_maskz_cvtepi64_epi16 + // CHECK-LABEL: test_mm512_maskz_cvtepi64_epi16 // CHECK: @llvm.x86.avx512.mask.pmov.qw.512 return _mm512_maskz_cvtepi64_epi16(__M, __A); } void test_mm512_mask_cvtepi64_storeu_epi16(void *__P, __mmask8 __M, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_cvtepi64_storeu_epi16 + // CHECK-LABEL: test_mm512_mask_cvtepi64_storeu_epi16 // CHECK: @llvm.x86.avx512.mask.pmov.qw.mem.512 return _mm512_mask_cvtepi64_storeu_epi16(__P, __M, __A); } __m128i test_mm512_extracti32x4_epi32(__m512i __A) { - // CHECK-LABEL: @test_mm512_extracti32x4_epi32 + // CHECK-LABEL: test_mm512_extracti32x4_epi32 // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> poison, <4 x i32> return _mm512_extracti32x4_epi32(__A, 3); } __m128i test_mm512_mask_extracti32x4_epi32(__m128i __W, __mmask8 __U, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_extracti32x4_epi32 + // CHECK-LABEL: test_mm512_mask_extracti32x4_epi32 // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> poison, <4 x i32> // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} return _mm512_mask_extracti32x4_epi32(__W, __U, __A, 3); } __m128i test_mm512_maskz_extracti32x4_epi32(__mmask8 __U, __m512i __A) { - // CHECK-LABEL: @test_mm512_maskz_extracti32x4_epi32 + // CHECK-LABEL: test_mm512_maskz_extracti32x4_epi32 // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> poison, <4 x i32> // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} return _mm512_maskz_extracti32x4_epi32(__U, __A, 3); } __m256i test_mm512_extracti64x4_epi64(__m512i __A) { - // CHECK-LABEL: @test_mm512_extracti64x4_epi64 + // CHECK-LABEL: test_mm512_extracti64x4_epi64 // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> poison, <4 x i32> return _mm512_extracti64x4_epi64(__A, 1); } __m256i test_mm512_mask_extracti64x4_epi64(__m256i __W, __mmask8 __U, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_extracti64x4_epi64 + // CHECK-LABEL: test_mm512_mask_extracti64x4_epi64 // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> poison, <4 x i32> // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} return _mm512_mask_extracti64x4_epi64(__W, __U, __A, 1); } __m256i test_mm512_maskz_extracti64x4_epi64(__mmask8 __U, __m512i __A) { - // CHECK-LABEL: @test_mm512_maskz_extracti64x4_epi64 + // CHECK-LABEL: test_mm512_maskz_extracti64x4_epi64 // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> poison, <4 x i32> // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} return _mm512_maskz_extracti64x4_epi64(__U, __A, 1); } __m512d test_mm512_insertf64x4(__m512d __A, __m256d __B) { - // CHECK-LABEL: @test_mm512_insertf64x4 + // CHECK-LABEL: test_mm512_insertf64x4 // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x i32> return _mm512_insertf64x4(__A, __B, 1); } __m512d test_mm512_mask_insertf64x4(__m512d __W, __mmask8 __U, __m512d __A, __m256d __B) { - // CHECK-LABEL: @test_mm512_mask_insertf64x4 + // CHECK-LABEL: test_mm512_mask_insertf64x4 // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x i32> // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} return _mm512_mask_insertf64x4(__W, __U, __A, __B, 1); } __m512d test_mm512_maskz_insertf64x4(__mmask8 __U, __m512d __A, __m256d __B) { - // CHECK-LABEL: @test_mm512_maskz_insertf64x4 + // CHECK-LABEL: test_mm512_maskz_insertf64x4 // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x i32> // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} return _mm512_maskz_insertf64x4(__U, __A, __B, 1); } __m512i test_mm512_inserti64x4(__m512i __A, __m256i __B) { - // CHECK-LABEL: @test_mm512_inserti64x4 + // CHECK-LABEL: test_mm512_inserti64x4 // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> return _mm512_inserti64x4(__A, __B, 1); } __m512i test_mm512_mask_inserti64x4(__m512i __W, __mmask8 __U, __m512i __A, __m256i __B) { - // CHECK-LABEL: @test_mm512_mask_inserti64x4 + // CHECK-LABEL: test_mm512_mask_inserti64x4 // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_mask_inserti64x4(__W, __U, __A, __B, 1); } __m512i test_mm512_maskz_inserti64x4(__mmask8 __U, __m512i __A, __m256i __B) { - // CHECK-LABEL: @test_mm512_maskz_inserti64x4 + // CHECK-LABEL: test_mm512_maskz_inserti64x4 // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_maskz_inserti64x4(__U, __A, __B, 1); } __m512 test_mm512_insertf32x4(__m512 __A, __m128 __B) { - // CHECK-LABEL: @test_mm512_insertf32x4 + // CHECK-LABEL: test_mm512_insertf32x4 // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x i32> return _mm512_insertf32x4(__A, __B, 1); } __m512 test_mm512_mask_insertf32x4(__m512 __W, __mmask16 __U, __m512 __A, __m128 __B) { - // CHECK-LABEL: @test_mm512_mask_insertf32x4 + // CHECK-LABEL: test_mm512_mask_insertf32x4 // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x i32> // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return _mm512_mask_insertf32x4(__W, __U, __A, __B, 1); } __m512 test_mm512_maskz_insertf32x4(__mmask16 __U, __m512 __A, __m128 __B) { - // CHECK-LABEL: @test_mm512_maskz_insertf32x4 + // CHECK-LABEL: test_mm512_maskz_insertf32x4 // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x i32> // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return _mm512_maskz_insertf32x4(__U, __A, __B, 1); } __m512i test_mm512_inserti32x4(__m512i __A, __m128i __B) { - // CHECK-LABEL: @test_mm512_inserti32x4 + // CHECK-LABEL: test_mm512_inserti32x4 // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> return _mm512_inserti32x4(__A, __B, 1); } __m512i test_mm512_mask_inserti32x4(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B) { - // CHECK-LABEL: @test_mm512_mask_inserti32x4 + // CHECK-LABEL: test_mm512_mask_inserti32x4 // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_mask_inserti32x4(__W, __U, __A, __B, 1); } __m512i test_mm512_maskz_inserti32x4(__mmask16 __U, __m512i __A, __m128i __B) { - // CHECK-LABEL: @test_mm512_maskz_inserti32x4 + // CHECK-LABEL: test_mm512_maskz_inserti32x4 // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_maskz_inserti32x4(__U, __A, __B, 1); } __m512d test_mm512_getmant_round_pd(__m512d __A) { - // CHECK-LABEL: @test_mm512_getmant_round_pd + // CHECK-LABEL: test_mm512_getmant_round_pd // CHECK: @llvm.x86.avx512.mask.getmant.pd.512 return _mm512_getmant_round_pd(__A,_MM_MANT_NORM_p5_2, _MM_MANT_SIGN_nan, _MM_FROUND_NO_EXC); } __m512d test_mm512_mask_getmant_round_pd(__m512d __W, __mmask8 __U, __m512d __A) { - // CHECK-LABEL: @test_mm512_mask_getmant_round_pd + // CHECK-LABEL: test_mm512_mask_getmant_round_pd // CHECK: @llvm.x86.avx512.mask.getmant.pd.512 return _mm512_mask_getmant_round_pd(__W, __U, __A,_MM_MANT_NORM_p5_2, _MM_MANT_SIGN_nan, _MM_FROUND_NO_EXC); } __m512d test_mm512_maskz_getmant_round_pd(__mmask8 __U, __m512d __A) { - // CHECK-LABEL: @test_mm512_maskz_getmant_round_pd + // CHECK-LABEL: test_mm512_maskz_getmant_round_pd // CHECK: @llvm.x86.avx512.mask.getmant.pd.512 return _mm512_maskz_getmant_round_pd(__U, __A,_MM_MANT_NORM_p5_2, _MM_MANT_SIGN_nan, _MM_FROUND_NO_EXC); } __m512d test_mm512_getmant_pd(__m512d __A) { - // CHECK-LABEL: @test_mm512_getmant_pd + // CHECK-LABEL: test_mm512_getmant_pd // CHECK: @llvm.x86.avx512.mask.getmant.pd.512 return _mm512_getmant_pd(__A,_MM_MANT_NORM_p5_2, _MM_MANT_SIGN_nan); } __m512d test_mm512_mask_getmant_pd(__m512d __W, __mmask8 __U, __m512d __A) { - // CHECK-LABEL: @test_mm512_mask_getmant_pd + // CHECK-LABEL: test_mm512_mask_getmant_pd // CHECK: @llvm.x86.avx512.mask.getmant.pd.512 return _mm512_mask_getmant_pd(__W, __U, __A,_MM_MANT_NORM_p5_2, _MM_MANT_SIGN_nan); } __m512d test_mm512_maskz_getmant_pd(__mmask8 __U, __m512d __A) { - // CHECK-LABEL: @test_mm512_maskz_getmant_pd + // CHECK-LABEL: test_mm512_maskz_getmant_pd // CHECK: @llvm.x86.avx512.mask.getmant.pd.512 return _mm512_maskz_getmant_pd(__U, __A,_MM_MANT_NORM_p5_2, _MM_MANT_SIGN_nan); } __m512 test_mm512_getmant_round_ps(__m512 __A) { - // CHECK-LABEL: @test_mm512_getmant_round_ps + // CHECK-LABEL: test_mm512_getmant_round_ps // CHECK: @llvm.x86.avx512.mask.getmant.ps.512 return _mm512_getmant_round_ps(__A,_MM_MANT_NORM_p5_2, _MM_MANT_SIGN_nan, _MM_FROUND_NO_EXC); } __m512 test_mm512_mask_getmant_round_ps(__m512 __W, __mmask16 __U, __m512 __A) { - // CHECK-LABEL: @test_mm512_mask_getmant_round_ps + // CHECK-LABEL: test_mm512_mask_getmant_round_ps // CHECK: @llvm.x86.avx512.mask.getmant.ps.512 return _mm512_mask_getmant_round_ps(__W, __U, __A,_MM_MANT_NORM_p5_2, _MM_MANT_SIGN_nan, _MM_FROUND_NO_EXC); } __m512 test_mm512_maskz_getmant_round_ps(__mmask16 __U, __m512 __A) { - // CHECK-LABEL: @test_mm512_maskz_getmant_round_ps + // CHECK-LABEL: test_mm512_maskz_getmant_round_ps // CHECK: @llvm.x86.avx512.mask.getmant.ps.512 return _mm512_maskz_getmant_round_ps(__U, __A,_MM_MANT_NORM_p5_2, _MM_MANT_SIGN_nan, _MM_FROUND_NO_EXC); } __m512 test_mm512_getmant_ps(__m512 __A) { - // CHECK-LABEL: @test_mm512_getmant_ps + // CHECK-LABEL: test_mm512_getmant_ps // CHECK: @llvm.x86.avx512.mask.getmant.ps.512 return _mm512_getmant_ps(__A,_MM_MANT_NORM_p5_2, _MM_MANT_SIGN_nan); } __m512 test_mm512_mask_getmant_ps(__m512 __W, __mmask16 __U, __m512 __A) { - // CHECK-LABEL: @test_mm512_mask_getmant_ps + // CHECK-LABEL: test_mm512_mask_getmant_ps // CHECK: @llvm.x86.avx512.mask.getmant.ps.512 return _mm512_mask_getmant_ps(__W, __U, __A,_MM_MANT_NORM_p5_2, _MM_MANT_SIGN_nan); } __m512 test_mm512_maskz_getmant_ps(__mmask16 __U, __m512 __A) { - // CHECK-LABEL: @test_mm512_maskz_getmant_ps + // CHECK-LABEL: test_mm512_maskz_getmant_ps // CHECK: @llvm.x86.avx512.mask.getmant.ps.512 return _mm512_maskz_getmant_ps(__U, __A,_MM_MANT_NORM_p5_2, _MM_MANT_SIGN_nan); } __m512d test_mm512_getexp_round_pd(__m512d __A) { - // CHECK-LABEL: @test_mm512_getexp_round_pd + // CHECK-LABEL: test_mm512_getexp_round_pd // CHECK: @llvm.x86.avx512.mask.getexp.pd.512 return _mm512_getexp_round_pd(__A, _MM_FROUND_NO_EXC); } __m512d test_mm512_mask_getexp_round_pd(__m512d __W, __mmask8 __U, __m512d __A) { - // CHECK-LABEL: @test_mm512_mask_getexp_round_pd + // CHECK-LABEL: test_mm512_mask_getexp_round_pd // CHECK: @llvm.x86.avx512.mask.getexp.pd.512 return _mm512_mask_getexp_round_pd(__W, __U, __A, _MM_FROUND_NO_EXC); } __m512d test_mm512_maskz_getexp_round_pd(__mmask8 __U, __m512d __A) { - // CHECK-LABEL: @test_mm512_maskz_getexp_round_pd + // CHECK-LABEL: test_mm512_maskz_getexp_round_pd // CHECK: @llvm.x86.avx512.mask.getexp.pd.512 return _mm512_maskz_getexp_round_pd(__U, __A, _MM_FROUND_NO_EXC); } __m512d test_mm512_getexp_pd(__m512d __A) { - // CHECK-LABEL: @test_mm512_getexp_pd + // CHECK-LABEL: test_mm512_getexp_pd // CHECK: @llvm.x86.avx512.mask.getexp.pd.512 return _mm512_getexp_pd(__A); } __m512d test_mm512_mask_getexp_pd(__m512d __W, __mmask8 __U, __m512d __A) { - // CHECK-LABEL: @test_mm512_mask_getexp_pd + // CHECK-LABEL: test_mm512_mask_getexp_pd // CHECK: @llvm.x86.avx512.mask.getexp.pd.512 return _mm512_mask_getexp_pd(__W, __U, __A); } __m512d test_mm512_maskz_getexp_pd(__mmask8 __U, __m512d __A) { - // CHECK-LABEL: @test_mm512_maskz_getexp_pd + // CHECK-LABEL: test_mm512_maskz_getexp_pd // CHECK: @llvm.x86.avx512.mask.getexp.pd.512 return _mm512_maskz_getexp_pd(__U, __A); } __m512 test_mm512_getexp_round_ps(__m512 __A) { - // CHECK-LABEL: @test_mm512_getexp_round_ps + // CHECK-LABEL: test_mm512_getexp_round_ps // CHECK: @llvm.x86.avx512.mask.getexp.ps.512 return _mm512_getexp_round_ps(__A, _MM_FROUND_NO_EXC); } __m512 test_mm512_mask_getexp_round_ps(__m512 __W, __mmask16 __U, __m512 __A) { - // CHECK-LABEL: @test_mm512_mask_getexp_round_ps + // CHECK-LABEL: test_mm512_mask_getexp_round_ps // CHECK: @llvm.x86.avx512.mask.getexp.ps.512 return _mm512_mask_getexp_round_ps(__W, __U, __A, _MM_FROUND_NO_EXC); } __m512 test_mm512_maskz_getexp_round_ps(__mmask16 __U, __m512 __A) { - // CHECK-LABEL: @test_mm512_maskz_getexp_round_ps + // CHECK-LABEL: test_mm512_maskz_getexp_round_ps // CHECK: @llvm.x86.avx512.mask.getexp.ps.512 return _mm512_maskz_getexp_round_ps(__U, __A, _MM_FROUND_NO_EXC); } __m512 test_mm512_getexp_ps(__m512 __A) { - // CHECK-LABEL: @test_mm512_getexp_ps + // CHECK-LABEL: test_mm512_getexp_ps // CHECK: @llvm.x86.avx512.mask.getexp.ps.512 return _mm512_getexp_ps(__A); } __m512 test_mm512_mask_getexp_ps(__m512 __W, __mmask16 __U, __m512 __A) { - // CHECK-LABEL: @test_mm512_mask_getexp_ps + // CHECK-LABEL: test_mm512_mask_getexp_ps // CHECK: @llvm.x86.avx512.mask.getexp.ps.512 return _mm512_mask_getexp_ps(__W, __U, __A); } __m512 test_mm512_maskz_getexp_ps(__mmask16 __U, __m512 __A) { - // CHECK-LABEL: @test_mm512_maskz_getexp_ps + // CHECK-LABEL: test_mm512_maskz_getexp_ps // CHECK: @llvm.x86.avx512.mask.getexp.ps.512 return _mm512_maskz_getexp_ps(__U, __A); } __m256 test_mm512_i64gather_ps(__m512i __index, void const *__addr) { - // CHECK-LABEL: @test_mm512_i64gather_ps + // CHECK-LABEL: test_mm512_i64gather_ps // CHECK: @llvm.x86.avx512.mask.gather.qps.512 return _mm512_i64gather_ps(__index, __addr, 2); } __m256 test_mm512_mask_i64gather_ps(__m256 __v1_old, __mmask8 __mask, __m512i __index, void const *__addr) { - // CHECK-LABEL: @test_mm512_mask_i64gather_ps + // CHECK-LABEL: test_mm512_mask_i64gather_ps // CHECK: @llvm.x86.avx512.mask.gather.qps.512 return _mm512_mask_i64gather_ps(__v1_old, __mask, __index, __addr, 2); } __m256i test_mm512_i64gather_epi32(__m512i __index, void const *__addr) { - // CHECK-LABEL: @test_mm512_i64gather_epi32 + // CHECK-LABEL: test_mm512_i64gather_epi32 // CHECK: @llvm.x86.avx512.mask.gather.qpi.512 return _mm512_i64gather_epi32(__index, __addr, 2); } __m256i test_mm512_mask_i64gather_epi32(__m256i __v1_old, __mmask8 __mask, __m512i __index, void const *__addr) { - // CHECK-LABEL: @test_mm512_mask_i64gather_epi32 + // CHECK-LABEL: test_mm512_mask_i64gather_epi32 // CHECK: @llvm.x86.avx512.mask.gather.qpi.512 return _mm512_mask_i64gather_epi32(__v1_old, __mask, __index, __addr, 2); } __m512d test_mm512_i64gather_pd(__m512i __index, void const *__addr) { - // CHECK-LABEL: @test_mm512_i64gather_pd + // CHECK-LABEL: test_mm512_i64gather_pd // CHECK: @llvm.x86.avx512.mask.gather.qpd.512 return _mm512_i64gather_pd(__index, __addr, 2); } __m512d test_mm512_mask_i64gather_pd(__m512d __v1_old, __mmask8 __mask, __m512i __index, void const *__addr) { - // CHECK-LABEL: @test_mm512_mask_i64gather_pd + // CHECK-LABEL: test_mm512_mask_i64gather_pd // CHECK: @llvm.x86.avx512.mask.gather.qpd.512 return _mm512_mask_i64gather_pd(__v1_old, __mask, __index, __addr, 2); } __m512i test_mm512_i64gather_epi64(__m512i __index, void const *__addr) { - // CHECK-LABEL: @test_mm512_i64gather_epi64 + // CHECK-LABEL: test_mm512_i64gather_epi64 // CHECK: @llvm.x86.avx512.mask.gather.qpq.512 return _mm512_i64gather_epi64(__index, __addr, 2); } __m512i test_mm512_mask_i64gather_epi64(__m512i __v1_old, __mmask8 __mask, __m512i __index, void const *__addr) { - // CHECK-LABEL: @test_mm512_mask_i64gather_epi64 + // CHECK-LABEL: test_mm512_mask_i64gather_epi64 // CHECK: @llvm.x86.avx512.mask.gather.qpq.512 return _mm512_mask_i64gather_epi64(__v1_old, __mask, __index, __addr, 2); } __m512 test_mm512_i32gather_ps(__m512i __index, void const *__addr) { - // CHECK-LABEL: @test_mm512_i32gather_ps + // CHECK-LABEL: test_mm512_i32gather_ps // CHECK: @llvm.x86.avx512.mask.gather.dps.512 return _mm512_i32gather_ps(__index, __addr, 2); } __m512 test_mm512_mask_i32gather_ps(__m512 v1_old, __mmask16 __mask, __m512i __index, void const *__addr) { - // CHECK-LABEL: @test_mm512_mask_i32gather_ps + // CHECK-LABEL: test_mm512_mask_i32gather_ps // CHECK: @llvm.x86.avx512.mask.gather.dps.512 return _mm512_mask_i32gather_ps(v1_old, __mask, __index, __addr, 2); } __m512i test_mm512_i32gather_epi32(__m512i __index, void const *__addr) { - // CHECK-LABEL: @test_mm512_i32gather_epi32 + // CHECK-LABEL: test_mm512_i32gather_epi32 // CHECK: @llvm.x86.avx512.mask.gather.dpi.512 return _mm512_i32gather_epi32(__index, __addr, 2); } __m512i test_mm512_mask_i32gather_epi32(__m512i __v1_old, __mmask16 __mask, __m512i __index, void const *__addr) { - // CHECK-LABEL: @test_mm512_mask_i32gather_epi32 + // CHECK-LABEL: test_mm512_mask_i32gather_epi32 // CHECK: @llvm.x86.avx512.mask.gather.dpi.512 return _mm512_mask_i32gather_epi32(__v1_old, __mask, __index, __addr, 2); } __m512d test_mm512_i32gather_pd(__m256i __index, void const *__addr) { - // CHECK-LABEL: @test_mm512_i32gather_pd + // CHECK-LABEL: test_mm512_i32gather_pd // CHECK: @llvm.x86.avx512.mask.gather.dpd.512 return _mm512_i32gather_pd(__index, __addr, 2); } __m512d test_mm512_mask_i32gather_pd(__m512d __v1_old, __mmask8 __mask, __m256i __index, void const *__addr) { - // CHECK-LABEL: @test_mm512_mask_i32gather_pd + // CHECK-LABEL: test_mm512_mask_i32gather_pd // CHECK: @llvm.x86.avx512.mask.gather.dpd.512 return _mm512_mask_i32gather_pd(__v1_old, __mask, __index, __addr, 2); } __m512i test_mm512_i32gather_epi64(__m256i __index, void const *__addr) { - // CHECK-LABEL: @test_mm512_i32gather_epi64 + // CHECK-LABEL: test_mm512_i32gather_epi64 // CHECK: @llvm.x86.avx512.mask.gather.dpq.512 return _mm512_i32gather_epi64(__index, __addr, 2); } __m512i test_mm512_mask_i32gather_epi64(__m512i __v1_old, __mmask8 __mask, __m256i __index, void const *__addr) { - // CHECK-LABEL: @test_mm512_mask_i32gather_epi64 + // CHECK-LABEL: test_mm512_mask_i32gather_epi64 // CHECK: @llvm.x86.avx512.mask.gather.dpq.512 return _mm512_mask_i32gather_epi64(__v1_old, __mask, __index, __addr, 2); } void test_mm512_i64scatter_ps(void *__addr, __m512i __index, __m256 __v1) { - // CHECK-LABEL: @test_mm512_i64scatter_ps + // CHECK-LABEL: test_mm512_i64scatter_ps // CHECK: @llvm.x86.avx512.mask.scatter.qps.512 return _mm512_i64scatter_ps(__addr, __index, __v1, 2); } void test_mm512_mask_i64scatter_ps(void *__addr, __mmask8 __mask, __m512i __index, __m256 __v1) { - // CHECK-LABEL: @test_mm512_mask_i64scatter_ps + // CHECK-LABEL: test_mm512_mask_i64scatter_ps // CHECK: @llvm.x86.avx512.mask.scatter.qps.512 return _mm512_mask_i64scatter_ps(__addr, __mask, __index, __v1, 2); } void test_mm512_i64scatter_epi32(void *__addr, __m512i __index, __m256i __v1) { - // CHECK-LABEL: @test_mm512_i64scatter_epi32 + // CHECK-LABEL: test_mm512_i64scatter_epi32 // CHECK: @llvm.x86.avx512.mask.scatter.qpi.512 return _mm512_i64scatter_epi32(__addr, __index, __v1, 2); } void test_mm512_mask_i64scatter_epi32(void *__addr, __mmask8 __mask, __m512i __index, __m256i __v1) { - // CHECK-LABEL: @test_mm512_mask_i64scatter_epi32 + // CHECK-LABEL: test_mm512_mask_i64scatter_epi32 // CHECK: @llvm.x86.avx512.mask.scatter.qpi.512 return _mm512_mask_i64scatter_epi32(__addr, __mask, __index, __v1, 2); } void test_mm512_i64scatter_pd(void *__addr, __m512i __index, __m512d __v1) { - // CHECK-LABEL: @test_mm512_i64scatter_pd + // CHECK-LABEL: test_mm512_i64scatter_pd // CHECK: @llvm.x86.avx512.mask.scatter.qpd.512 return _mm512_i64scatter_pd(__addr, __index, __v1, 2); } void test_mm512_mask_i64scatter_pd(void *__addr, __mmask8 __mask, __m512i __index, __m512d __v1) { - // CHECK-LABEL: @test_mm512_mask_i64scatter_pd + // CHECK-LABEL: test_mm512_mask_i64scatter_pd // CHECK: @llvm.x86.avx512.mask.scatter.qpd.512 return _mm512_mask_i64scatter_pd(__addr, __mask, __index, __v1, 2); } void test_mm512_i64scatter_epi64(void *__addr, __m512i __index, __m512i __v1) { - // CHECK-LABEL: @test_mm512_i64scatter_epi64 + // CHECK-LABEL: test_mm512_i64scatter_epi64 // CHECK: @llvm.x86.avx512.mask.scatter.qpq.512 return _mm512_i64scatter_epi64(__addr, __index, __v1, 2); } void test_mm512_mask_i64scatter_epi64(void *__addr, __mmask8 __mask, __m512i __index, __m512i __v1) { - // CHECK-LABEL: @test_mm512_mask_i64scatter_epi64 + // CHECK-LABEL: test_mm512_mask_i64scatter_epi64 // CHECK: @llvm.x86.avx512.mask.scatter.qpq.512 return _mm512_mask_i64scatter_epi64(__addr, __mask, __index, __v1, 2); } void test_mm512_i32scatter_ps(void *__addr, __m512i __index, __m512 __v1) { - // CHECK-LABEL: @test_mm512_i32scatter_ps + // CHECK-LABEL: test_mm512_i32scatter_ps // CHECK: @llvm.x86.avx512.mask.scatter.dps.512 return _mm512_i32scatter_ps(__addr, __index, __v1, 2); } void test_mm512_mask_i32scatter_ps(void *__addr, __mmask16 __mask, __m512i __index, __m512 __v1) { - // CHECK-LABEL: @test_mm512_mask_i32scatter_ps + // CHECK-LABEL: test_mm512_mask_i32scatter_ps // CHECK: @llvm.x86.avx512.mask.scatter.dps.512 return _mm512_mask_i32scatter_ps(__addr, __mask, __index, __v1, 2); } void test_mm512_i32scatter_epi32(void *__addr, __m512i __index, __m512i __v1) { - // CHECK-LABEL: @test_mm512_i32scatter_epi32 + // CHECK-LABEL: test_mm512_i32scatter_epi32 // CHECK: @llvm.x86.avx512.mask.scatter.dpi.512 return _mm512_i32scatter_epi32(__addr, __index, __v1, 2); } void test_mm512_mask_i32scatter_epi32(void *__addr, __mmask16 __mask, __m512i __index, __m512i __v1) { - // CHECK-LABEL: @test_mm512_mask_i32scatter_epi32 + // CHECK-LABEL: test_mm512_mask_i32scatter_epi32 // CHECK: @llvm.x86.avx512.mask.scatter.dpi.512 return _mm512_mask_i32scatter_epi32(__addr, __mask, __index, __v1, 2); } void test_mm512_i32scatter_pd(void *__addr, __m256i __index, __m512d __v1) { - // CHECK-LABEL: @test_mm512_i32scatter_pd + // CHECK-LABEL: test_mm512_i32scatter_pd // CHECK: @llvm.x86.avx512.mask.scatter.dpd.512 return _mm512_i32scatter_pd(__addr, __index, __v1, 2); } void test_mm512_mask_i32scatter_pd(void *__addr, __mmask8 __mask, __m256i __index, __m512d __v1) { - // CHECK-LABEL: @test_mm512_mask_i32scatter_pd + // CHECK-LABEL: test_mm512_mask_i32scatter_pd // CHECK: @llvm.x86.avx512.mask.scatter.dpd.512 return _mm512_mask_i32scatter_pd(__addr, __mask, __index, __v1, 2); } void test_mm512_i32scatter_epi64(void *__addr, __m256i __index, __m512i __v1) { - // CHECK-LABEL: @test_mm512_i32scatter_epi64 + // CHECK-LABEL: test_mm512_i32scatter_epi64 // CHECK: @llvm.x86.avx512.mask.scatter.dpq.512 return _mm512_i32scatter_epi64(__addr, __index, __v1, 2); } void test_mm512_mask_i32scatter_epi64(void *__addr, __mmask8 __mask, __m256i __index, __m512i __v1) { - // CHECK-LABEL: @test_mm512_mask_i32scatter_epi64 + // CHECK-LABEL: test_mm512_mask_i32scatter_epi64 // CHECK: @llvm.x86.avx512.mask.scatter.dpq.512 return _mm512_mask_i32scatter_epi64(__addr, __mask, __index, __v1, 2); } __m128d test_mm_mask_rsqrt14_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B){ - // CHECK-LABEL: @test_mm_mask_rsqrt14_sd + // CHECK-LABEL: test_mm_mask_rsqrt14_sd // CHECK: @llvm.x86.avx512.rsqrt14.sd return _mm_mask_rsqrt14_sd(__W, __U, __A, __B); } __m128d test_mm_maskz_rsqrt14_sd(__mmask8 __U, __m128d __A, __m128d __B){ - // CHECK-LABEL: @test_mm_maskz_rsqrt14_sd + // CHECK-LABEL: test_mm_maskz_rsqrt14_sd // CHECK: @llvm.x86.avx512.rsqrt14.sd return _mm_maskz_rsqrt14_sd(__U, __A, __B); } __m128 test_mm_mask_rsqrt14_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B){ - // CHECK-LABEL: @test_mm_mask_rsqrt14_ss + // CHECK-LABEL: test_mm_mask_rsqrt14_ss // CHECK: @llvm.x86.avx512.rsqrt14.ss return _mm_mask_rsqrt14_ss(__W, __U, __A, __B); } __m128 test_mm_maskz_rsqrt14_ss(__mmask8 __U, __m128 __A, __m128 __B){ - // CHECK-LABEL: @test_mm_maskz_rsqrt14_ss + // CHECK-LABEL: test_mm_maskz_rsqrt14_ss // CHECK: @llvm.x86.avx512.rsqrt14.ss return _mm_maskz_rsqrt14_ss(__U, __A, __B); } __m512d test_mm512_mask_rcp14_pd (__m512d __W, __mmask8 __U, __m512d __A) { - // CHECK-LABEL: @test_mm512_mask_rcp14_pd + // CHECK-LABEL: test_mm512_mask_rcp14_pd // CHECK: @llvm.x86.avx512.rcp14.pd.512 return _mm512_mask_rcp14_pd (__W,__U,__A); } __m512d test_mm512_maskz_rcp14_pd (__mmask8 __U, __m512d __A) { - // CHECK-LABEL: @test_mm512_maskz_rcp14_pd + // CHECK-LABEL: test_mm512_maskz_rcp14_pd // CHECK: @llvm.x86.avx512.rcp14.pd.512 return _mm512_maskz_rcp14_pd (__U,__A); } __m512 test_mm512_mask_rcp14_ps (__m512 __W, __mmask16 __U, __m512 __A) { - // CHECK-LABEL: @test_mm512_mask_rcp14_ps + // CHECK-LABEL: test_mm512_mask_rcp14_ps // CHECK: @llvm.x86.avx512.rcp14.ps.512 return _mm512_mask_rcp14_ps (__W,__U,__A); } __m512 test_mm512_maskz_rcp14_ps (__mmask16 __U, __m512 __A) { - // CHECK-LABEL: @test_mm512_maskz_rcp14_ps + // CHECK-LABEL: test_mm512_maskz_rcp14_ps // CHECK: @llvm.x86.avx512.rcp14.ps.512 return _mm512_maskz_rcp14_ps (__U,__A); } __m128d test_mm_mask_rcp14_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B){ - // CHECK-LABEL: @test_mm_mask_rcp14_sd + // CHECK-LABEL: test_mm_mask_rcp14_sd // CHECK: @llvm.x86.avx512.rcp14.sd return _mm_mask_rcp14_sd(__W, __U, __A, __B); } __m128d test_mm_maskz_rcp14_sd(__mmask8 __U, __m128d __A, __m128d __B){ - // CHECK-LABEL: @test_mm_maskz_rcp14_sd + // CHECK-LABEL: test_mm_maskz_rcp14_sd // CHECK: @llvm.x86.avx512.rcp14.sd return _mm_maskz_rcp14_sd(__U, __A, __B); } __m128 test_mm_mask_rcp14_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B){ - // CHECK-LABEL: @test_mm_mask_rcp14_ss + // CHECK-LABEL: test_mm_mask_rcp14_ss // CHECK: @llvm.x86.avx512.rcp14.ss return _mm_mask_rcp14_ss(__W, __U, __A, __B); } __m128 test_mm_maskz_rcp14_ss(__mmask8 __U, __m128 __A, __m128 __B){ - // CHECK-LABEL: @test_mm_maskz_rcp14_ss + // CHECK-LABEL: test_mm_maskz_rcp14_ss // CHECK: @llvm.x86.avx512.rcp14.ss return _mm_maskz_rcp14_ss(__U, __A, __B); } __m128d test_mm_mask_getexp_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B){ - // CHECK-LABEL: @test_mm_mask_getexp_sd + // CHECK-LABEL: test_mm_mask_getexp_sd // CHECK: @llvm.x86.avx512.mask.getexp.sd return _mm_mask_getexp_sd(__W, __U, __A, __B); } __m128d test_mm_mask_getexp_round_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B){ - // CHECK-LABEL: @test_mm_mask_getexp_round_sd + // CHECK-LABEL: test_mm_mask_getexp_round_sd // CHECK: @llvm.x86.avx512.mask.getexp.sd return _mm_mask_getexp_round_sd(__W, __U, __A, __B, _MM_FROUND_NO_EXC); } __m128d test_mm_maskz_getexp_sd(__mmask8 __U, __m128d __A, __m128d __B){ - // CHECK-LABEL: @test_mm_maskz_getexp_sd + // CHECK-LABEL: test_mm_maskz_getexp_sd // CHECK: @llvm.x86.avx512.mask.getexp.sd return _mm_maskz_getexp_sd(__U, __A, __B); } __m128d test_mm_maskz_getexp_round_sd(__mmask8 __U, __m128d __A, __m128d __B){ - // CHECK-LABEL: @test_mm_maskz_getexp_round_sd + // CHECK-LABEL: test_mm_maskz_getexp_round_sd // CHECK: @llvm.x86.avx512.mask.getexp.sd return _mm_maskz_getexp_round_sd(__U, __A, __B, _MM_FROUND_NO_EXC); } __m128 test_mm_mask_getexp_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B){ - // CHECK-LABEL: @test_mm_mask_getexp_ss + // CHECK-LABEL: test_mm_mask_getexp_ss // CHECK: @llvm.x86.avx512.mask.getexp.ss return _mm_mask_getexp_ss(__W, __U, __A, __B); } __m128 test_mm_mask_getexp_round_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B){ - // CHECK-LABEL: @test_mm_mask_getexp_round_ss + // CHECK-LABEL: test_mm_mask_getexp_round_ss // CHECK: @llvm.x86.avx512.mask.getexp.ss return _mm_mask_getexp_round_ss(__W, __U, __A, __B, _MM_FROUND_NO_EXC); } __m128 test_mm_maskz_getexp_ss(__mmask8 __U, __m128 __A, __m128 __B){ - // CHECK-LABEL: @test_mm_maskz_getexp_ss + // CHECK-LABEL: test_mm_maskz_getexp_ss // CHECK: @llvm.x86.avx512.mask.getexp.ss return _mm_maskz_getexp_ss(__U, __A, __B); } __m128 test_mm_maskz_getexp_round_ss(__mmask8 __U, __m128 __A, __m128 __B){ - // CHECK-LABEL: @test_mm_maskz_getexp_round_ss + // CHECK-LABEL: test_mm_maskz_getexp_round_ss // CHECK: @llvm.x86.avx512.mask.getexp.ss return _mm_maskz_getexp_round_ss(__U, __A, __B, _MM_FROUND_NO_EXC); } __m128d test_mm_mask_getmant_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B){ - // CHECK-LABEL: @test_mm_mask_getmant_sd + // CHECK-LABEL: test_mm_mask_getmant_sd // CHECK: @llvm.x86.avx512.mask.getmant.sd return _mm_mask_getmant_sd(__W, __U, __A, __B, _MM_MANT_NORM_p5_2, _MM_MANT_SIGN_nan); } __m128d test_mm_mask_getmant_round_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B){ - // CHECK-LABEL: @test_mm_mask_getmant_round_sd + // CHECK-LABEL: test_mm_mask_getmant_round_sd // CHECK: @llvm.x86.avx512.mask.getmant.sd return _mm_mask_getmant_round_sd(__W, __U, __A, __B, _MM_MANT_NORM_p5_2, _MM_MANT_SIGN_nan, _MM_FROUND_NO_EXC); } __m128d test_mm_maskz_getmant_sd(__mmask8 __U, __m128d __A, __m128d __B){ - // CHECK-LABEL: @test_mm_maskz_getmant_sd + // CHECK-LABEL: test_mm_maskz_getmant_sd // CHECK: @llvm.x86.avx512.mask.getmant.sd return _mm_maskz_getmant_sd(__U, __A, __B, _MM_MANT_NORM_p5_2, _MM_MANT_SIGN_nan); } __m128d test_mm_maskz_getmant_round_sd(__mmask8 __U, __m128d __A, __m128d __B){ - // CHECK-LABEL: @test_mm_maskz_getmant_round_sd + // CHECK-LABEL: test_mm_maskz_getmant_round_sd // CHECK: @llvm.x86.avx512.mask.getmant.sd return _mm_maskz_getmant_round_sd(__U, __A, __B, _MM_MANT_NORM_p5_2, _MM_MANT_SIGN_nan, _MM_FROUND_NO_EXC); } __m128 test_mm_mask_getmant_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B){ - // CHECK-LABEL: @test_mm_mask_getmant_ss + // CHECK-LABEL: test_mm_mask_getmant_ss // CHECK: @llvm.x86.avx512.mask.getmant.ss return _mm_mask_getmant_ss(__W, __U, __A, __B, _MM_MANT_NORM_p5_2, _MM_MANT_SIGN_nan); } __m128 test_mm_mask_getmant_round_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B){ - // CHECK-LABEL: @test_mm_mask_getmant_round_ss + // CHECK-LABEL: test_mm_mask_getmant_round_ss // CHECK: @llvm.x86.avx512.mask.getmant.ss return _mm_mask_getmant_round_ss(__W, __U, __A, __B, _MM_MANT_NORM_p5_2, _MM_MANT_SIGN_nan, _MM_FROUND_NO_EXC); } __m128 test_mm_maskz_getmant_ss(__mmask8 __U, __m128 __A, __m128 __B){ - // CHECK-LABEL: @test_mm_maskz_getmant_ss + // CHECK-LABEL: test_mm_maskz_getmant_ss // CHECK: @llvm.x86.avx512.mask.getmant.ss return _mm_maskz_getmant_ss(__U, __A, __B, _MM_MANT_NORM_p5_2, _MM_MANT_SIGN_nan); } __m128 test_mm_maskz_getmant_round_ss(__mmask8 __U, __m128 __A, __m128 __B){ - // CHECK-LABEL: @test_mm_maskz_getmant_round_ss + // CHECK-LABEL: test_mm_maskz_getmant_round_ss // CHECK: @llvm.x86.avx512.mask.getmant.ss return _mm_maskz_getmant_round_ss(__U, __A, __B, _MM_MANT_NORM_p5_2, _MM_MANT_SIGN_nan, _MM_FROUND_NO_EXC); } __m128 test_mm_mask_fmadd_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B){ - // CHECK-LABEL: @test_mm_mask_fmadd_ss + // CHECK-LABEL: test_mm_mask_fmadd_ss // CHECK: [[A:%.+]] = extractelement <4 x float> [[ORIGA:%.+]], i64 0 // CHECK-NEXT: [[B:%.+]] = extractelement <4 x float> %{{.*}}, i64 0 // CHECK-NEXT: [[C:%.+]] = extractelement <4 x float> %{{.*}}, i64 0 @@ -7407,7 +7409,7 @@ __m128 test_mm_mask_fmadd_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B){ } __m128 test_mm_fmadd_round_ss(__m128 __A, __m128 __B, __m128 __C){ - // CHECK-LABEL: @test_mm_fmadd_round_ss + // CHECK-LABEL: test_mm_fmadd_round_ss // CHECK: [[A:%.+]] = extractelement <4 x float> [[ORIGA:%.+]], i64 0 // CHECK-NEXT: [[B:%.+]] = extractelement <4 x float> %{{.*}}, i64 0 // CHECK-NEXT: [[C:%.+]] = extractelement <4 x float> %{{.*}}, i64 0 @@ -7417,7 +7419,7 @@ __m128 test_mm_fmadd_round_ss(__m128 __A, __m128 __B, __m128 __C){ } __m128 test_mm_mask_fmadd_round_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B){ - // CHECK-LABEL: @test_mm_mask_fmadd_round_ss + // CHECK-LABEL: test_mm_mask_fmadd_round_ss // CHECK: [[A:%.+]] = extractelement <4 x float> [[ORIGA:%.+]], i64 0 // CHECK-NEXT: [[B:%.+]] = extractelement <4 x float> %{{.*}}, i64 0 // CHECK-NEXT: [[C:%.+]] = extractelement <4 x float> %{{.*}}, i64 0 @@ -7430,7 +7432,7 @@ __m128 test_mm_mask_fmadd_round_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 } __m128 test_mm_maskz_fmadd_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C){ - // CHECK-LABEL: @test_mm_maskz_fmadd_ss + // CHECK-LABEL: test_mm_maskz_fmadd_ss // CHECK: [[A:%.+]] = extractelement <4 x float> [[ORIGA:%.+]], i64 0 // CHECK-NEXT: [[B:%.+]] = extractelement <4 x float> %{{.*}}, i64 0 // CHECK-NEXT: [[C:%.+]] = extractelement <4 x float> %{{.*}}, i64 0 @@ -7443,7 +7445,7 @@ __m128 test_mm_maskz_fmadd_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C){ } __m128 test_mm_maskz_fmadd_round_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C){ - // CHECK-LABEL: @test_mm_maskz_fmadd_round_ss + // CHECK-LABEL: test_mm_maskz_fmadd_round_ss // CHECK: [[A:%.+]] = extractelement <4 x float> [[ORIGA:%.+]], i64 0 // CHECK-NEXT: [[B:%.+]] = extractelement <4 x float> %{{.*}}, i64 0 // CHECK-NEXT: [[C:%.+]] = extractelement <4 x float> %{{.*}}, i64 0 @@ -7456,7 +7458,7 @@ __m128 test_mm_maskz_fmadd_round_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 } __m128 test_mm_mask3_fmadd_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U){ - // CHECK-LABEL: @test_mm_mask3_fmadd_ss + // CHECK-LABEL: test_mm_mask3_fmadd_ss // CHECK: [[A:%.+]] = extractelement <4 x float> %{{.*}}, i64 0 // CHECK-NEXT: [[B:%.+]] = extractelement <4 x float> %{{.*}}, i64 0 // CHECK-NEXT: [[C:%.+]] = extractelement <4 x float> [[ORIGC:%.+]], i64 0 @@ -7469,7 +7471,7 @@ __m128 test_mm_mask3_fmadd_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U){ } __m128 test_mm_mask3_fmadd_round_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U){ - // CHECK-LABEL: @test_mm_mask3_fmadd_round_ss + // CHECK-LABEL: test_mm_mask3_fmadd_round_ss // CHECK: [[A:%.+]] = extractelement <4 x float> %{{.*}}, i64 0 // CHECK-NEXT: [[B:%.+]] = extractelement <4 x float> %{{.*}}, i64 0 // CHECK-NEXT: [[C:%.+]] = extractelement <4 x float> [[ORIGC:%.+]], i64 0 @@ -7482,7 +7484,7 @@ __m128 test_mm_mask3_fmadd_round_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 } __m128 test_mm_mask_fmsub_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B){ - // CHECK-LABEL: @test_mm_mask_fmsub_ss + // CHECK-LABEL: test_mm_mask_fmsub_ss // CHECK: [[NEG:%.+]] = fneg <4 x float> %{{.*}} // CHECK: [[A:%.+]] = extractelement <4 x float> [[ORIGA:%.+]], i64 0 // CHECK-NEXT: [[B:%.+]] = extractelement <4 x float> %{{.*}}, i64 0 @@ -7496,7 +7498,7 @@ __m128 test_mm_mask_fmsub_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B){ } __m128 test_mm_fmsub_round_ss(__m128 __A, __m128 __B, __m128 __C){ - // CHECK-LABEL: @test_mm_fmsub_round_ss + // CHECK-LABEL: test_mm_fmsub_round_ss // CHECK: [[NEG:%.+]] = fneg <4 x float> %{{.*}} // CHECK: [[A:%.+]] = extractelement <4 x float> [[ORIGA:%.+]], i64 0 // CHECK-NEXT: [[B:%.+]] = extractelement <4 x float> %{{.*}}, i64 0 @@ -7507,7 +7509,7 @@ __m128 test_mm_fmsub_round_ss(__m128 __A, __m128 __B, __m128 __C){ } __m128 test_mm_mask_fmsub_round_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B){ - // CHECK-LABEL: @test_mm_mask_fmsub_round_ss + // CHECK-LABEL: test_mm_mask_fmsub_round_ss // CHECK: [[NEG:%.+]] = fneg <4 x float> %{{.*}} // CHECK: [[A:%.+]] = extractelement <4 x float> [[ORIGA:%.+]], i64 0 // CHECK-NEXT: [[B:%.+]] = extractelement <4 x float> %{{.*}}, i64 0 @@ -7521,7 +7523,7 @@ __m128 test_mm_mask_fmsub_round_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 } __m128 test_mm_maskz_fmsub_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C){ - // CHECK-LABEL: @test_mm_maskz_fmsub_ss + // CHECK-LABEL: test_mm_maskz_fmsub_ss // CHECK: [[NEG:%.+]] = fneg <4 x float> %{{.*}} // CHECK: [[A:%.+]] = extractelement <4 x float> [[ORIGA:%.+]], i64 0 // CHECK-NEXT: [[B:%.+]] = extractelement <4 x float> %{{.*}}, i64 0 @@ -7535,7 +7537,7 @@ __m128 test_mm_maskz_fmsub_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C){ } __m128 test_mm_maskz_fmsub_round_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C){ - // CHECK-LABEL: @test_mm_maskz_fmsub_round_ss + // CHECK-LABEL: test_mm_maskz_fmsub_round_ss // CHECK: [[NEG:%.+]] = fneg <4 x float> %{{.*}} // CHECK: [[A:%.+]] = extractelement <4 x float> [[ORIGA:%.+]], i64 0 // CHECK-NEXT: [[B:%.+]] = extractelement <4 x float> %{{.*}}, i64 0 @@ -7549,7 +7551,7 @@ __m128 test_mm_maskz_fmsub_round_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 } __m128 test_mm_mask3_fmsub_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U){ - // CHECK-LABEL: @test_mm_mask3_fmsub_ss + // CHECK-LABEL: test_mm_mask3_fmsub_ss // CHECK: [[NEG:%.+]] = fneg <4 x float> [[ORIGC:%.+]] // CHECK: [[A:%.+]] = extractelement <4 x float> %{{.*}}, i64 0 // CHECK-NEXT: [[B:%.+]] = extractelement <4 x float> %{{.*}}, i64 0 @@ -7564,7 +7566,7 @@ __m128 test_mm_mask3_fmsub_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U){ } __m128 test_mm_mask3_fmsub_round_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U){ - // CHECK-LABEL: @test_mm_mask3_fmsub_round_ss + // CHECK-LABEL: test_mm_mask3_fmsub_round_ss // CHECK: [[NEG:%.+]] = fneg <4 x float> [[ORIGC:%.+]] // CHECK: [[A:%.+]] = extractelement <4 x float> %{{.*}}, i64 0 // CHECK-NEXT: [[B:%.+]] = extractelement <4 x float> %{{.*}}, i64 0 @@ -7579,7 +7581,7 @@ __m128 test_mm_mask3_fmsub_round_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 } __m128 test_mm_mask_fnmadd_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B){ - // CHECK-LABEL: @test_mm_mask_fnmadd_ss + // CHECK-LABEL: test_mm_mask_fnmadd_ss // CHECK: [[NEG:%.+]] = fneg <4 x float> %{{.*}} // CHECK: [[A:%.+]] = extractelement <4 x float> [[ORIGA:%.+]], i64 0 // CHECK-NEXT: [[B:%.+]] = extractelement <4 x float> [[NEG]], i64 0 @@ -7593,7 +7595,7 @@ __m128 test_mm_mask_fnmadd_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B){ } __m128 test_mm_fnmadd_round_ss(__m128 __A, __m128 __B, __m128 __C){ - // CHECK-LABEL: @test_mm_fnmadd_round_ss + // CHECK-LABEL: test_mm_fnmadd_round_ss // CHECK: [[NEG:%.+]] = fneg <4 x float> %{{.*}} // CHECK: [[A:%.+]] = extractelement <4 x float> [[ORIGA:%.+]], i64 0 // CHECK-NEXT: [[B:%.+]] = extractelement <4 x float> [[NEG]], i64 0 @@ -7604,7 +7606,7 @@ __m128 test_mm_fnmadd_round_ss(__m128 __A, __m128 __B, __m128 __C){ } __m128 test_mm_mask_fnmadd_round_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B){ - // CHECK-LABEL: @test_mm_mask_fnmadd_round_ss + // CHECK-LABEL: test_mm_mask_fnmadd_round_ss // CHECK: [[NEG:%.+]] = fneg <4 x float> %{{.*}} // CHECK: [[A:%.+]] = extractelement <4 x float> [[ORIGA:%.+]], i64 0 // CHECK-NEXT: [[B:%.+]] = extractelement <4 x float> [[NEG]], i64 0 @@ -7618,7 +7620,7 @@ __m128 test_mm_mask_fnmadd_round_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 } __m128 test_mm_maskz_fnmadd_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C){ - // CHECK-LABEL: @test_mm_maskz_fnmadd_ss + // CHECK-LABEL: test_mm_maskz_fnmadd_ss // CHECK: [[NEG:%.+]] = fneg <4 x float> %{{.*}} // CHECK: [[A:%.+]] = extractelement <4 x float> [[ORIGA:%.+]], i64 0 // CHECK-NEXT: [[B:%.+]] = extractelement <4 x float> [[NEG]], i64 0 @@ -7632,7 +7634,7 @@ __m128 test_mm_maskz_fnmadd_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) } __m128 test_mm_maskz_fnmadd_round_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C){ - // CHECK-LABEL: @test_mm_maskz_fnmadd_round_ss + // CHECK-LABEL: test_mm_maskz_fnmadd_round_ss // CHECK: [[NEG:%.+]] = fneg <4 x float> %{{.*}} // CHECK: [[A:%.+]] = extractelement <4 x float> [[ORIGA:%.+]], i64 0 // CHECK-NEXT: [[B:%.+]] = extractelement <4 x float> [[NEG]], i64 0 @@ -7646,7 +7648,7 @@ __m128 test_mm_maskz_fnmadd_round_ss(__mmask8 __U, __m128 __A, __m128 __B, __m12 } __m128 test_mm_mask3_fnmadd_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U){ - // CHECK-LABEL: @test_mm_mask3_fnmadd_ss + // CHECK-LABEL: test_mm_mask3_fnmadd_ss // CHECK: [[NEG:%.+]] = fneg <4 x float> %{{.*}} // CHECK: [[A:%.+]] = extractelement <4 x float> %{{.*}}, i64 0 // CHECK-NEXT: [[B:%.+]] = extractelement <4 x float> [[NEG]], i64 0 @@ -7660,7 +7662,7 @@ __m128 test_mm_mask3_fnmadd_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U) } __m128 test_mm_mask3_fnmadd_round_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U){ - // CHECK-LABEL: @test_mm_mask3_fnmadd_round_ss + // CHECK-LABEL: test_mm_mask3_fnmadd_round_ss // CHECK: [[NEG:%.+]] = fneg <4 x float> %{{.*}} // CHECK: [[A:%.+]] = extractelement <4 x float> %{{.*}}, i64 0 // CHECK-NEXT: [[B:%.+]] = extractelement <4 x float> [[NEG]], i64 0 @@ -7674,7 +7676,7 @@ __m128 test_mm_mask3_fnmadd_round_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask } __m128 test_mm_mask_fnmsub_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B){ - // CHECK-LABEL: @test_mm_mask_fnmsub_ss + // CHECK-LABEL: test_mm_mask_fnmsub_ss // CHECK: [[NEG:%.+]] = fneg <4 x float> %{{.*}} // CHECK: [[NEG2:%.+]] = fneg <4 x float> %{{.*}} // CHECK: [[A:%.+]] = extractelement <4 x float> [[ORIGA:%.+]], i64 0 @@ -7689,7 +7691,7 @@ __m128 test_mm_mask_fnmsub_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B){ } __m128 test_mm_fnmsub_round_ss(__m128 __A, __m128 __B, __m128 __C){ - // CHECK-LABEL: @test_mm_fnmsub_round_ss + // CHECK-LABEL: test_mm_fnmsub_round_ss // CHECK: [[NEG:%.+]] = fneg <4 x float> %{{.*}} // CHECK: [[NEG2:%.+]] = fneg <4 x float> %{{.*}} // CHECK: [[A:%.+]] = extractelement <4 x float> [[ORIGA:%.+]], i64 0 @@ -7701,7 +7703,7 @@ __m128 test_mm_fnmsub_round_ss(__m128 __A, __m128 __B, __m128 __C){ } __m128 test_mm_mask_fnmsub_round_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B){ - // CHECK-LABEL: @test_mm_mask_fnmsub_round_ss + // CHECK-LABEL: test_mm_mask_fnmsub_round_ss // CHECK: [[NEG:%.+]] = fneg <4 x float> %{{.*}} // CHECK: [[NEG2:%.+]] = fneg <4 x float> %{{.*}} // CHECK: [[A:%.+]] = extractelement <4 x float> [[ORIGA:%.+]], i64 0 @@ -7716,7 +7718,7 @@ __m128 test_mm_mask_fnmsub_round_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 } __m128 test_mm_maskz_fnmsub_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C){ - // CHECK-LABEL: @test_mm_maskz_fnmsub_ss + // CHECK-LABEL: test_mm_maskz_fnmsub_ss // CHECK: [[NEG:%.+]] = fneg <4 x float> %{{.*}} // CHECK: [[NEG2:%.+]] = fneg <4 x float> %{{.*}} // CHECK: [[A:%.+]] = extractelement <4 x float> [[ORIGA:%.+]], i64 0 @@ -7731,7 +7733,7 @@ __m128 test_mm_maskz_fnmsub_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) } __m128 test_mm_maskz_fnmsub_round_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C){ - // CHECK-LABEL: @test_mm_maskz_fnmsub_round_ss + // CHECK-LABEL: test_mm_maskz_fnmsub_round_ss // CHECK: [[NEG:%.+]] = fneg <4 x float> %{{.*}} // CHECK: [[NEG2:%.+]] = fneg <4 x float> %{{.*}} // CHECK: [[A:%.+]] = extractelement <4 x float> [[ORIGA:%.+]], i64 0 @@ -7746,7 +7748,7 @@ __m128 test_mm_maskz_fnmsub_round_ss(__mmask8 __U, __m128 __A, __m128 __B, __m12 } __m128 test_mm_mask3_fnmsub_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U){ - // CHECK-LABEL: @test_mm_mask3_fnmsub_ss + // CHECK-LABEL: test_mm_mask3_fnmsub_ss // CHECK: [[NEG:%.+]] = fneg <4 x float> %{{.*}} // CHECK: [[NEG2:%.+]] = fneg <4 x float> [[ORIGC:%.+]] // CHECK: [[A:%.+]] = extractelement <4 x float> %{{.*}}, i64 0 @@ -7762,7 +7764,7 @@ __m128 test_mm_mask3_fnmsub_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U) } __m128 test_mm_mask3_fnmsub_round_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U){ - // CHECK-LABEL: @test_mm_mask3_fnmsub_round_ss + // CHECK-LABEL: test_mm_mask3_fnmsub_round_ss // CHECK: [[NEG:%.+]] = fneg <4 x float> %{{.*}} // CHECK: [[NEG2:%.+]] = fneg <4 x float> [[ORIGC:%.+]] // CHECK: [[A:%.+]] = extractelement <4 x float> %{{.*}}, i64 0 @@ -7778,7 +7780,7 @@ __m128 test_mm_mask3_fnmsub_round_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask } __m128d test_mm_mask_fmadd_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B){ - // CHECK-LABEL: @test_mm_mask_fmadd_sd + // CHECK-LABEL: test_mm_mask_fmadd_sd // CHECK: [[A:%.+]] = extractelement <2 x double> [[ORIGA:%.+]], i64 0 // CHECK-NEXT: [[B:%.+]] = extractelement <2 x double> %{{.*}}, i64 0 // CHECK-NEXT: [[C:%.+]] = extractelement <2 x double> %{{.*}}, i64 0 @@ -7791,7 +7793,7 @@ __m128d test_mm_mask_fmadd_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __ } __m128d test_mm_fmadd_round_sd(__m128d __A, __m128d __B, __m128d __C){ - // CHECK-LABEL: @test_mm_fmadd_round_sd + // CHECK-LABEL: test_mm_fmadd_round_sd // CHECK: [[A:%.+]] = extractelement <2 x double> [[ORIGA:%.+]], i64 0 // CHECK-NEXT: [[B:%.+]] = extractelement <2 x double> %{{.*}}, i64 0 // CHECK-NEXT: [[C:%.+]] = extractelement <2 x double> %{{.*}}, i64 0 @@ -7801,7 +7803,7 @@ __m128d test_mm_fmadd_round_sd(__m128d __A, __m128d __B, __m128d __C){ } __m128d test_mm_mask_fmadd_round_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B){ - // CHECK-LABEL: @test_mm_mask_fmadd_round_sd + // CHECK-LABEL: test_mm_mask_fmadd_round_sd // CHECK: [[A:%.+]] = extractelement <2 x double> [[ORIGA:%.+]], i64 0 // CHECK-NEXT: [[B:%.+]] = extractelement <2 x double> %{{.*}}, i64 0 // CHECK-NEXT: [[C:%.+]] = extractelement <2 x double> %{{.*}}, i64 0 @@ -7814,7 +7816,7 @@ __m128d test_mm_mask_fmadd_round_sd(__m128d __W, __mmask8 __U, __m128d __A, __m1 } __m128d test_mm_maskz_fmadd_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C){ - // CHECK-LABEL: @test_mm_maskz_fmadd_sd + // CHECK-LABEL: test_mm_maskz_fmadd_sd // CHECK: [[A:%.+]] = extractelement <2 x double> [[ORIGA:%.+]], i64 0 // CHECK-NEXT: [[B:%.+]] = extractelement <2 x double> %{{.*}}, i64 0 // CHECK-NEXT: [[C:%.+]] = extractelement <2 x double> %{{.*}}, i64 0 @@ -7827,7 +7829,7 @@ __m128d test_mm_maskz_fmadd_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d _ } __m128d test_mm_maskz_fmadd_round_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C){ - // CHECK-LABEL: @test_mm_maskz_fmadd_round_sd + // CHECK-LABEL: test_mm_maskz_fmadd_round_sd // CHECK: [[A:%.+]] = extractelement <2 x double> [[ORIGA:%.+]], i64 0 // CHECK-NEXT: [[B:%.+]] = extractelement <2 x double> %{{.*}}, i64 0 // CHECK-NEXT: [[C:%.+]] = extractelement <2 x double> %{{.*}}, i64 0 @@ -7840,7 +7842,7 @@ __m128d test_mm_maskz_fmadd_round_sd(__mmask8 __U, __m128d __A, __m128d __B, __m } __m128d test_mm_mask3_fmadd_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U){ - // CHECK-LABEL: @test_mm_mask3_fmadd_sd + // CHECK-LABEL: test_mm_mask3_fmadd_sd // CHECK: [[A:%.+]] = extractelement <2 x double> %{{.*}}, i64 0 // CHECK-NEXT: [[B:%.+]] = extractelement <2 x double> %{{.*}}, i64 0 // CHECK-NEXT: [[C:%.+]] = extractelement <2 x double> [[ORIGC:%.+]], i64 0 @@ -7853,7 +7855,7 @@ __m128d test_mm_mask3_fmadd_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 _ } __m128d test_mm_mask3_fmadd_round_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U){ - // CHECK-LABEL: @test_mm_mask3_fmadd_round_sd + // CHECK-LABEL: test_mm_mask3_fmadd_round_sd // CHECK: [[A:%.+]] = extractelement <2 x double> %{{.*}}, i64 0 // CHECK-NEXT: [[B:%.+]] = extractelement <2 x double> %{{.*}}, i64 0 // CHECK-NEXT: [[C:%.+]] = extractelement <2 x double> [[ORIGC:%.+]], i64 0 @@ -7866,7 +7868,7 @@ __m128d test_mm_mask3_fmadd_round_sd(__m128d __W, __m128d __X, __m128d __Y, __mm } __m128d test_mm_mask_fmsub_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B){ - // CHECK-LABEL: @test_mm_mask_fmsub_sd + // CHECK-LABEL: test_mm_mask_fmsub_sd // CHECK: [[NEG:%.+]] = fneg <2 x double> %{{.*}} // CHECK: [[A:%.+]] = extractelement <2 x double> [[ORIGA:%.+]], i64 0 // CHECK-NEXT: [[B:%.+]] = extractelement <2 x double> %{{.*}}, i64 0 @@ -7880,7 +7882,7 @@ __m128d test_mm_mask_fmsub_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __ } __m128d test_mm_fmsub_round_sd(__m128d __A, __m128d __B, __m128d __C){ - // CHECK-LABEL: @test_mm_fmsub_round_sd + // CHECK-LABEL: test_mm_fmsub_round_sd // CHECK: [[NEG:%.+]] = fneg <2 x double> %{{.*}} // CHECK: [[A:%.+]] = extractelement <2 x double> [[ORIGA:%.+]], i64 0 // CHECK-NEXT: [[B:%.+]] = extractelement <2 x double> %{{.*}}, i64 0 @@ -7891,7 +7893,7 @@ __m128d test_mm_fmsub_round_sd(__m128d __A, __m128d __B, __m128d __C){ } __m128d test_mm_mask_fmsub_round_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B){ - // CHECK-LABEL: @test_mm_mask_fmsub_round_sd + // CHECK-LABEL: test_mm_mask_fmsub_round_sd // CHECK: [[NEG:%.+]] = fneg <2 x double> %{{.*}} // CHECK: [[A:%.+]] = extractelement <2 x double> [[ORIGA:%.+]], i64 0 // CHECK-NEXT: [[B:%.+]] = extractelement <2 x double> %{{.*}}, i64 0 @@ -7905,7 +7907,7 @@ __m128d test_mm_mask_fmsub_round_sd(__m128d __W, __mmask8 __U, __m128d __A, __m1 } __m128d test_mm_maskz_fmsub_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C){ - // CHECK-LABEL: @test_mm_maskz_fmsub_sd + // CHECK-LABEL: test_mm_maskz_fmsub_sd // CHECK: [[NEG:%.+]] = fneg <2 x double> %{{.*}} // CHECK: [[A:%.+]] = extractelement <2 x double> [[ORIGA:%.+]], i64 0 // CHECK-NEXT: [[B:%.+]] = extractelement <2 x double> %{{.*}}, i64 0 @@ -7919,7 +7921,7 @@ __m128d test_mm_maskz_fmsub_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d _ } __m128d test_mm_maskz_fmsub_round_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C){ - // CHECK-LABEL: @test_mm_maskz_fmsub_round_sd + // CHECK-LABEL: test_mm_maskz_fmsub_round_sd // CHECK: [[NEG:%.+]] = fneg <2 x double> %{{.*}} // CHECK: [[A:%.+]] = extractelement <2 x double> [[ORIGA:%.+]], i64 0 // CHECK-NEXT: [[B:%.+]] = extractelement <2 x double> %{{.*}}, i64 0 @@ -7933,7 +7935,7 @@ __m128d test_mm_maskz_fmsub_round_sd(__mmask8 __U, __m128d __A, __m128d __B, __m } __m128d test_mm_mask3_fmsub_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U){ - // CHECK-LABEL: @test_mm_mask3_fmsub_sd + // CHECK-LABEL: test_mm_mask3_fmsub_sd // CHECK: [[NEG:%.+]] = fneg <2 x double> [[ORIGC:%.+]] // CHECK: [[A:%.+]] = extractelement <2 x double> %{{.*}}, i64 0 // CHECK-NEXT: [[B:%.+]] = extractelement <2 x double> %{{.*}}, i64 0 @@ -7948,7 +7950,7 @@ __m128d test_mm_mask3_fmsub_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 _ } __m128d test_mm_mask3_fmsub_round_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U){ - // CHECK-LABEL: @test_mm_mask3_fmsub_round_sd + // CHECK-LABEL: test_mm_mask3_fmsub_round_sd // CHECK: [[NEG:%.+]] = fneg <2 x double> [[ORIGC:%.+]] // CHECK: [[A:%.+]] = extractelement <2 x double> %{{.*}}, i64 0 // CHECK-NEXT: [[B:%.+]] = extractelement <2 x double> %{{.*}}, i64 0 @@ -7963,7 +7965,7 @@ __m128d test_mm_mask3_fmsub_round_sd(__m128d __W, __m128d __X, __m128d __Y, __mm } __m128d test_mm_mask_fnmadd_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B){ - // CHECK-LABEL: @test_mm_mask_fnmadd_sd + // CHECK-LABEL: test_mm_mask_fnmadd_sd // CHECK: [[NEG:%.+]] = fneg <2 x double> %{{.*}} // CHECK: [[A:%.+]] = extractelement <2 x double> [[ORIGA:%.+]], i64 0 // CHECK-NEXT: [[B:%.+]] = extractelement <2 x double> [[NEG]], i64 0 @@ -7977,7 +7979,7 @@ __m128d test_mm_mask_fnmadd_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d _ } __m128d test_mm_fnmadd_round_sd(__m128d __A, __m128d __B, __m128d __C){ - // CHECK-LABEL: @test_mm_fnmadd_round_sd + // CHECK-LABEL: test_mm_fnmadd_round_sd // CHECK: [[NEG:%.+]] = fneg <2 x double> %{{.*}} // CHECK: [[A:%.+]] = extractelement <2 x double> [[ORIGA:%.+]], i64 0 // CHECK-NEXT: [[B:%.+]] = extractelement <2 x double> [[NEG]], i64 0 @@ -7988,7 +7990,7 @@ __m128d test_mm_fnmadd_round_sd(__m128d __A, __m128d __B, __m128d __C){ } __m128d test_mm_mask_fnmadd_round_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B){ - // CHECK-LABEL: @test_mm_mask_fnmadd_round_sd + // CHECK-LABEL: test_mm_mask_fnmadd_round_sd // CHECK: [[NEG:%.+]] = fneg <2 x double> %{{.*}} // CHECK: [[A:%.+]] = extractelement <2 x double> [[ORIGA:%.+]], i64 0 // CHECK-NEXT: [[B:%.+]] = extractelement <2 x double> [[NEG]], i64 0 @@ -8002,7 +8004,7 @@ __m128d test_mm_mask_fnmadd_round_sd(__m128d __W, __mmask8 __U, __m128d __A, __m } __m128d test_mm_maskz_fnmadd_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C){ - // CHECK-LABEL: @test_mm_maskz_fnmadd_sd + // CHECK-LABEL: test_mm_maskz_fnmadd_sd // CHECK: [[NEG:%.+]] = fneg <2 x double> %{{.*}} // CHECK: [[A:%.+]] = extractelement <2 x double> [[ORIGA:%.+]], i64 0 // CHECK-NEXT: [[B:%.+]] = extractelement <2 x double> [[NEG]], i64 0 @@ -8016,7 +8018,7 @@ __m128d test_mm_maskz_fnmadd_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d } __m128d test_mm_maskz_fnmadd_round_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C){ - // CHECK-LABEL: @test_mm_maskz_fnmadd_round_sd + // CHECK-LABEL: test_mm_maskz_fnmadd_round_sd // CHECK: [[NEG:%.+]] = fneg <2 x double> %{{.*}} // CHECK: [[A:%.+]] = extractelement <2 x double> [[ORIGA:%.+]], i64 0 // CHECK-NEXT: [[B:%.+]] = extractelement <2 x double> [[NEG]], i64 0 @@ -8030,7 +8032,7 @@ __m128d test_mm_maskz_fnmadd_round_sd(__mmask8 __U, __m128d __A, __m128d __B, __ } __m128d test_mm_mask3_fnmadd_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U){ - // CHECK-LABEL: @test_mm_mask3_fnmadd_sd + // CHECK-LABEL: test_mm_mask3_fnmadd_sd // CHECK: [[NEG:%.+]] = fneg <2 x double> %{{.*}} // CHECK: [[A:%.+]] = extractelement <2 x double> %{{.*}}, i64 0 // CHECK-NEXT: [[B:%.+]] = extractelement <2 x double> [[NEG]], i64 0 @@ -8044,7 +8046,7 @@ __m128d test_mm_mask3_fnmadd_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 } __m128d test_mm_mask3_fnmadd_round_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U){ - // CHECK-LABEL: @test_mm_mask3_fnmadd_round_sd + // CHECK-LABEL: test_mm_mask3_fnmadd_round_sd // CHECK: [[NEG:%.+]] = fneg <2 x double> %{{.*}} // CHECK: [[A:%.+]] = extractelement <2 x double> %{{.*}}, i64 0 // CHECK-NEXT: [[B:%.+]] = extractelement <2 x double> [[NEG]], i64 0 @@ -8058,7 +8060,7 @@ __m128d test_mm_mask3_fnmadd_round_sd(__m128d __W, __m128d __X, __m128d __Y, __m } __m128d test_mm_mask_fnmsub_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B){ - // CHECK-LABEL: @test_mm_mask_fnmsub_sd + // CHECK-LABEL: test_mm_mask_fnmsub_sd // CHECK: [[NEG:%.+]] = fneg <2 x double> %{{.*}} // CHECK: [[NEG2:%.+]] = fneg <2 x double> %{{.*}} // CHECK: [[A:%.+]] = extractelement <2 x double> [[ORIGA:%.]], i64 0 @@ -8073,7 +8075,7 @@ __m128d test_mm_mask_fnmsub_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d _ } __m128d test_mm_fnmsub_round_sd(__m128d __A, __m128d __B, __m128d __C){ - // CHECK-LABEL: @test_mm_fnmsub_round_sd + // CHECK-LABEL: test_mm_fnmsub_round_sd // CHECK: [[NEG:%.+]] = fneg <2 x double> %{{.*}} // CHECK: [[NEG2:%.+]] = fneg <2 x double> %{{.*}} // CHECK: [[A:%.+]] = extractelement <2 x double> [[ORIGA:%.]], i64 0 @@ -8085,7 +8087,7 @@ __m128d test_mm_fnmsub_round_sd(__m128d __A, __m128d __B, __m128d __C){ } __m128d test_mm_mask_fnmsub_round_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B){ - // CHECK-LABEL: @test_mm_mask_fnmsub_round_sd + // CHECK-LABEL: test_mm_mask_fnmsub_round_sd // CHECK: [[NEG:%.+]] = fneg <2 x double> %{{.*}} // CHECK: [[NEG2:%.+]] = fneg <2 x double> %{{.*}} // CHECK: [[A:%.+]] = extractelement <2 x double> [[ORIGA:%.]], i64 0 @@ -8100,7 +8102,7 @@ __m128d test_mm_mask_fnmsub_round_sd(__m128d __W, __mmask8 __U, __m128d __A, __m } __m128d test_mm_maskz_fnmsub_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C){ - // CHECK-LABEL: @test_mm_maskz_fnmsub_sd + // CHECK-LABEL: test_mm_maskz_fnmsub_sd // CHECK: [[NEG:%.+]] = fneg <2 x double> %{{.*}} // CHECK: [[NEG2:%.+]] = fneg <2 x double> %{{.*}} // CHECK: [[A:%.+]] = extractelement <2 x double> [[ORIGA:%.]], i64 0 @@ -8115,7 +8117,7 @@ __m128d test_mm_maskz_fnmsub_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d } __m128d test_mm_maskz_fnmsub_round_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C){ - // CHECK-LABEL: @test_mm_maskz_fnmsub_round_sd + // CHECK-LABEL: test_mm_maskz_fnmsub_round_sd // CHECK: [[NEG:%.+]] = fneg <2 x double> %{{.*}} // CHECK: [[NEG2:%.+]] = fneg <2 x double> %{{.*}} // CHECK: [[A:%.+]] = extractelement <2 x double> [[ORIGA:%.]], i64 0 @@ -8130,7 +8132,7 @@ __m128d test_mm_maskz_fnmsub_round_sd(__mmask8 __U, __m128d __A, __m128d __B, __ } __m128d test_mm_mask3_fnmsub_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U){ - // CHECK-LABEL: @test_mm_mask3_fnmsub_sd + // CHECK-LABEL: test_mm_mask3_fnmsub_sd // CHECK: [[NEG:%.+]] = fneg <2 x double> %{{.*}} // CHECK: [[NEG2:%.+]] = fneg <2 x double> [[ORIGC:%.+]] // CHECK: [[A:%.+]] = extractelement <2 x double> %{{.*}}, i64 0 @@ -8146,7 +8148,7 @@ __m128d test_mm_mask3_fnmsub_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 } __m128d test_mm_mask3_fnmsub_round_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U){ - // CHECK-LABEL: @test_mm_mask3_fnmsub_round_sd + // CHECK-LABEL: test_mm_mask3_fnmsub_round_sd // CHECK: [[NEG:%.+]] = fneg <2 x double> %{{.*}} // CHECK: [[NEG2:%.+]] = fneg <2 x double> [[ORIGC:%.+]] // CHECK: [[A:%.+]] = extractelement <2 x double> %{{.*}}, i64 0 @@ -8162,161 +8164,161 @@ __m128d test_mm_mask3_fnmsub_round_sd(__m128d __W, __m128d __X, __m128d __Y, __m } __m512d test_mm512_permutex_pd(__m512d __X) { - // CHECK-LABEL: @test_mm512_permutex_pd + // CHECK-LABEL: test_mm512_permutex_pd // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> poison, <8 x i32> return _mm512_permutex_pd(__X, 0); } __m512d test_mm512_mask_permutex_pd(__m512d __W, __mmask8 __U, __m512d __X) { - // CHECK-LABEL: @test_mm512_mask_permutex_pd + // CHECK-LABEL: test_mm512_mask_permutex_pd // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> poison, <8 x i32> // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} return _mm512_mask_permutex_pd(__W, __U, __X, 0); } __m512d test_mm512_maskz_permutex_pd(__mmask8 __U, __m512d __X) { - // CHECK-LABEL: @test_mm512_maskz_permutex_pd + // CHECK-LABEL: test_mm512_maskz_permutex_pd // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> poison, <8 x i32> // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} return _mm512_maskz_permutex_pd(__U, __X, 0); } __m512i test_mm512_permutex_epi64(__m512i __X) { - // CHECK-LABEL: @test_mm512_permutex_epi64 + // CHECK-LABEL: test_mm512_permutex_epi64 // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> poison, <8 x i32> return _mm512_permutex_epi64(__X, 0); } __m512i test_mm512_mask_permutex_epi64(__m512i __W, __mmask8 __M, __m512i __X) { - // CHECK-LABEL: @test_mm512_mask_permutex_epi64 + // CHECK-LABEL: test_mm512_mask_permutex_epi64 // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> poison, <8 x i32> // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_mask_permutex_epi64(__W, __M, __X, 0); } __m512i test_mm512_maskz_permutex_epi64(__mmask8 __M, __m512i __X) { - // CHECK-LABEL: @test_mm512_maskz_permutex_epi64 + // CHECK-LABEL: test_mm512_maskz_permutex_epi64 // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> poison, <8 x i32> // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_maskz_permutex_epi64(__M, __X, 0); } __m512d test_mm512_permutexvar_pd(__m512i __X, __m512d __Y) { - // CHECK-LABEL: @test_mm512_permutexvar_pd + // CHECK-LABEL: test_mm512_permutexvar_pd // CHECK: @llvm.x86.avx512.permvar.df.512 return _mm512_permutexvar_pd(__X, __Y); } __m512d test_mm512_mask_permutexvar_pd(__m512d __W, __mmask8 __U, __m512i __X, __m512d __Y) { - // CHECK-LABEL: @test_mm512_mask_permutexvar_pd + // CHECK-LABEL: test_mm512_mask_permutexvar_pd // CHECK: @llvm.x86.avx512.permvar.df.512 // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} return _mm512_mask_permutexvar_pd(__W, __U, __X, __Y); } __m512d test_mm512_maskz_permutexvar_pd(__mmask8 __U, __m512i __X, __m512d __Y) { - // CHECK-LABEL: @test_mm512_maskz_permutexvar_pd + // CHECK-LABEL: test_mm512_maskz_permutexvar_pd // CHECK: @llvm.x86.avx512.permvar.df.512 // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} return _mm512_maskz_permutexvar_pd(__U, __X, __Y); } __m512i test_mm512_maskz_permutexvar_epi64(__mmask8 __M, __m512i __X, __m512i __Y) { - // CHECK-LABEL: @test_mm512_maskz_permutexvar_epi64 + // CHECK-LABEL: test_mm512_maskz_permutexvar_epi64 // CHECK: @llvm.x86.avx512.permvar.di.512 // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_maskz_permutexvar_epi64(__M, __X, __Y); } __m512i test_mm512_permutexvar_epi64(__m512i __X, __m512i __Y) { - // CHECK-LABEL: @test_mm512_permutexvar_epi64 + // CHECK-LABEL: test_mm512_permutexvar_epi64 // CHECK: @llvm.x86.avx512.permvar.di.512 return _mm512_permutexvar_epi64(__X, __Y); } __m512i test_mm512_mask_permutexvar_epi64(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y) { - // CHECK-LABEL: @test_mm512_mask_permutexvar_epi64 + // CHECK-LABEL: test_mm512_mask_permutexvar_epi64 // CHECK: @llvm.x86.avx512.permvar.di.512 // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_mask_permutexvar_epi64(__W, __M, __X, __Y); } __m512 test_mm512_permutexvar_ps(__m512i __X, __m512 __Y) { - // CHECK-LABEL: @test_mm512_permutexvar_ps + // CHECK-LABEL: test_mm512_permutexvar_ps // CHECK: @llvm.x86.avx512.permvar.sf.512 return _mm512_permutexvar_ps(__X, __Y); } __m512 test_mm512_mask_permutexvar_ps(__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y) { - // CHECK-LABEL: @test_mm512_mask_permutexvar_ps + // CHECK-LABEL: test_mm512_mask_permutexvar_ps // CHECK: @llvm.x86.avx512.permvar.sf.512 // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return _mm512_mask_permutexvar_ps(__W, __U, __X, __Y); } __m512 test_mm512_maskz_permutexvar_ps(__mmask16 __U, __m512i __X, __m512 __Y) { - // CHECK-LABEL: @test_mm512_maskz_permutexvar_ps + // CHECK-LABEL: test_mm512_maskz_permutexvar_ps // CHECK: @llvm.x86.avx512.permvar.sf.512 // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return _mm512_maskz_permutexvar_ps(__U, __X, __Y); } __m512i test_mm512_maskz_permutexvar_epi32(__mmask16 __M, __m512i __X, __m512i __Y) { - // CHECK-LABEL: @test_mm512_maskz_permutexvar_epi32 + // CHECK-LABEL: test_mm512_maskz_permutexvar_epi32 // CHECK: @llvm.x86.avx512.permvar.si.512 // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_maskz_permutexvar_epi32(__M, __X, __Y); } __m512i test_mm512_permutexvar_epi32(__m512i __X, __m512i __Y) { - // CHECK-LABEL: @test_mm512_permutexvar_epi32 + // CHECK-LABEL: test_mm512_permutexvar_epi32 // CHECK: @llvm.x86.avx512.permvar.si.512 return _mm512_permutexvar_epi32(__X, __Y); } __m512i test_mm512_mask_permutexvar_epi32(__m512i __W, __mmask16 __M, __m512i __X, __m512i __Y) { - // CHECK-LABEL: @test_mm512_mask_permutexvar_epi32 + // CHECK-LABEL: test_mm512_mask_permutexvar_epi32 // CHECK: @llvm.x86.avx512.permvar.si.512 // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_mask_permutexvar_epi32(__W, __M, __X, __Y); } __mmask16 test_mm512_kand(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { - // CHECK-LABEL: @test_mm512_kand + // CHECK-LABEL: test_mm512_kand // CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> // CHECK: [[RHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> // CHECK: [[RES:%.*]] = and <16 x i1> [[LHS]], [[RHS]] - // CHECK: bitcast <16 x i1> [[RES]] to i16 + // CHECK: bitcast <16 x i1> {{.*}} to i16 return _mm512_mask_cmpneq_epu32_mask(_mm512_kand(_mm512_cmpneq_epu32_mask(__A, __B), _mm512_cmpneq_epu32_mask(__C, __D)), __E, __F); } __mmask16 test_mm512_kandn(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { - // CHECK-LABEL: @test_mm512_kandn + // CHECK-LABEL: test_mm512_kandn // CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> // CHECK: [[RHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> // CHECK: [[NOT:%.*]] = xor <16 x i1> [[LHS]], // CHECK: [[RES:%.*]] = and <16 x i1> [[NOT]], [[RHS]] - // CHECK: bitcast <16 x i1> [[RES]] to i16 + // CHECK: bitcast <16 x i1> {{.*}} to i16 return _mm512_mask_cmpneq_epu32_mask(_mm512_kandn(_mm512_cmpneq_epu32_mask(__A, __B), _mm512_cmpneq_epu32_mask(__C, __D)), __E, __F); } __mmask16 test_mm512_kor(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { - // CHECK-LABEL: @test_mm512_kor + // CHECK-LABEL: test_mm512_kor // CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> // CHECK: [[RHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> // CHECK: [[RES:%.*]] = or <16 x i1> [[LHS]], [[RHS]] - // CHECK: bitcast <16 x i1> [[RES]] to i16 + // CHECK: bitcast <16 x i1> {{.*}} to i16 return _mm512_mask_cmpneq_epu32_mask(_mm512_kor(_mm512_cmpneq_epu32_mask(__A, __B), _mm512_cmpneq_epu32_mask(__C, __D)), __E, __F); } int test_mm512_kortestc(__m512i __A, __m512i __B, __m512i __C, __m512i __D) { - // CHECK-LABEL: @test_mm512_kortestc + // CHECK-LABEL: test_mm512_kortestc // CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> // CHECK: [[RHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> // CHECK: [[OR:%.*]] = or <16 x i1> [[LHS]], [[RHS]] @@ -8328,7 +8330,7 @@ int test_mm512_kortestc(__m512i __A, __m512i __B, __m512i __C, __m512i __D) { } int test_mm512_kortestz(__m512i __A, __m512i __B, __m512i __C, __m512i __D) { - // CHECK-LABEL: @test_mm512_kortestz + // CHECK-LABEL: test_mm512_kortestz // CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> // CHECK: [[RHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> // CHECK: [[OR:%.*]] = or <16 x i1> [[LHS]], [[RHS]] @@ -8340,7 +8342,7 @@ int test_mm512_kortestz(__m512i __A, __m512i __B, __m512i __C, __m512i __D) { } unsigned char test_kortestz_mask16_u8(__m512i __A, __m512i __B, __m512i __C, __m512i __D) { - // CHECK-LABEL: @test_kortestz_mask16_u8 + // CHECK-LABEL: test_kortestz_mask16_u8 // CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> // CHECK: [[RHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> // CHECK: [[OR:%.*]] = or <16 x i1> [[LHS]], [[RHS]] @@ -8353,7 +8355,7 @@ unsigned char test_kortestz_mask16_u8(__m512i __A, __m512i __B, __m512i __C, __m } unsigned char test_kortestc_mask16_u8(__m512i __A, __m512i __B, __m512i __C, __m512i __D) { - // CHECK-LABEL: @test_kortestc_mask16_u8 + // CHECK-LABEL: test_kortestc_mask16_u8 // CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> // CHECK: [[RHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> // CHECK: [[OR:%.*]] = or <16 x i1> [[LHS]], [[RHS]] @@ -8366,7 +8368,7 @@ unsigned char test_kortestc_mask16_u8(__m512i __A, __m512i __B, __m512i __C, __m } unsigned char test_kortest_mask16_u8(__m512i __A, __m512i __B, __m512i __C, __m512i __D, unsigned char *CF) { - // CHECK-LABEL: @test_kortest_mask16_u8 + // CHECK-LABEL: test_kortest_mask16_u8 // CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> // CHECK: [[RHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> // CHECK: [[OR:%.*]] = or <16 x i1> [[LHS]], [[RHS]] @@ -8386,7 +8388,7 @@ unsigned char test_kortest_mask16_u8(__m512i __A, __m512i __B, __m512i __C, __m5 } __mmask16 test_mm512_kunpackb(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { - // CHECK-LABEL: @test_mm512_kunpackb + // CHECK-LABEL: test_mm512_kunpackb // CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> // CHECK: [[RHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> // CHECK: [[LHS2:%.*]] = shufflevector <16 x i1> [[LHS]], <16 x i1> [[LHS]], <8 x i32> @@ -8399,30 +8401,30 @@ __mmask16 test_mm512_kunpackb(__m512i __A, __m512i __B, __m512i __C, __m512i __D } __mmask16 test_mm512_kxnor(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { - // CHECK-LABEL: @test_mm512_kxnor + // CHECK-LABEL: test_mm512_kxnor // CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> // CHECK: [[RHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> // CHECK: [[NOT:%.*]] = xor <16 x i1> [[LHS]], // CHECK: [[RES:%.*]] = xor <16 x i1> [[NOT]], [[RHS]] - // CHECK: bitcast <16 x i1> [[RES]] to i16 + // CHECK: bitcast <16 x i1> {{.*}} to i16 return _mm512_mask_cmpneq_epu32_mask(_mm512_kxnor(_mm512_cmpneq_epu32_mask(__A, __B), _mm512_cmpneq_epu32_mask(__C, __D)), __E, __F); } __mmask16 test_mm512_kxor(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { - // CHECK-LABEL: @test_mm512_kxor + // CHECK-LABEL: test_mm512_kxor // CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> // CHECK: [[RHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> // CHECK: [[RES:%.*]] = xor <16 x i1> [[LHS]], [[RHS]] - // CHECK: bitcast <16 x i1> [[RES]] to i16 + // CHECK: bitcast <16 x i1> {{.*}} to i16 return _mm512_mask_cmpneq_epu32_mask(_mm512_kxor(_mm512_cmpneq_epu32_mask(__A, __B), _mm512_cmpneq_epu32_mask(__C, __D)), __E, __F); } __mmask16 test_knot_mask16(__mmask16 a) { - // CHECK-LABEL: @test_knot_mask16 + // CHECK-LABEL: test_knot_mask16 // CHECK: [[IN:%.*]] = bitcast i16 %{{.*}} to <16 x i1> // CHECK: [[NOT:%.*]] = xor <16 x i1> [[IN]], // CHECK: bitcast <16 x i1> [[NOT]] to i16 @@ -8430,80 +8432,80 @@ __mmask16 test_knot_mask16(__mmask16 a) { } __mmask16 test_kand_mask16(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { - // CHECK-LABEL: @test_kand_mask16 + // CHECK-LABEL: test_kand_mask16 // CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> // CHECK: [[RHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> // CHECK: [[RES:%.*]] = and <16 x i1> [[LHS]], [[RHS]] - // CHECK: bitcast <16 x i1> [[RES]] to i16 + // CHECK: bitcast <16 x i1> {{.*}} to i16 return _mm512_mask_cmpneq_epu32_mask(_kand_mask16(_mm512_cmpneq_epu32_mask(__A, __B), _mm512_cmpneq_epu32_mask(__C, __D)), __E, __F); } __mmask16 test_kandn_mask16(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { - // CHECK-LABEL: @test_kandn_mask16 + // CHECK-LABEL: test_kandn_mask16 // CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> // CHECK: [[RHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> // CHECK: [[NOT:%.*]] = xor <16 x i1> [[LHS]], // CHECK: [[RES:%.*]] = and <16 x i1> [[NOT]], [[RHS]] - // CHECK: bitcast <16 x i1> [[RES]] to i16 + // CHECK: bitcast <16 x i1> {{.*}} to i16 return _mm512_mask_cmpneq_epu32_mask(_kandn_mask16(_mm512_cmpneq_epu32_mask(__A, __B), _mm512_cmpneq_epu32_mask(__C, __D)), __E, __F); } __mmask16 test_kor_mask16(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { - // CHECK-LABEL: @test_kor_mask16 + // CHECK-LABEL: test_kor_mask16 // CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> // CHECK: [[RHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> // CHECK: [[RES:%.*]] = or <16 x i1> [[LHS]], [[RHS]] - // CHECK: bitcast <16 x i1> [[RES]] to i16 + // CHECK: bitcast <16 x i1> {{.*}} to i16 return _mm512_mask_cmpneq_epu32_mask(_kor_mask16(_mm512_cmpneq_epu32_mask(__A, __B), _mm512_cmpneq_epu32_mask(__C, __D)), __E, __F); } __mmask16 test_kxnor_mask16(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { - // CHECK-LABEL: @test_kxnor_mask16 + // CHECK-LABEL: test_kxnor_mask16 // CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> // CHECK: [[RHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> // CHECK: [[NOT:%.*]] = xor <16 x i1> [[LHS]], // CHECK: [[RES:%.*]] = xor <16 x i1> [[NOT]], [[RHS]] - // CHECK: bitcast <16 x i1> [[RES]] to i16 + // CHECK: bitcast <16 x i1> {{.*}} to i16 return _mm512_mask_cmpneq_epu32_mask(_kxnor_mask16(_mm512_cmpneq_epu32_mask(__A, __B), _mm512_cmpneq_epu32_mask(__C, __D)), __E, __F); } __mmask16 test_kxor_mask16(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { - // CHECK-LABEL: @test_kxor_mask16 + // CHECK-LABEL: test_kxor_mask16 // CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> // CHECK: [[RHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> // CHECK: [[RES:%.*]] = xor <16 x i1> [[LHS]], [[RHS]] - // CHECK: bitcast <16 x i1> [[RES]] to i16 + // CHECK: bitcast <16 x i1> {{.*}} to i16 return _mm512_mask_cmpneq_epu32_mask(_kxor_mask16(_mm512_cmpneq_epu32_mask(__A, __B), _mm512_cmpneq_epu32_mask(__C, __D)), __E, __F); } __mmask16 test_kshiftli_mask16(__m512i A, __m512i B, __m512i C, __m512i D) { - // CHECK-LABEL: @test_kshiftli_mask16 + // CHECK-LABEL: test_kshiftli_mask16 // CHECK: [[VAL:%.*]] = bitcast i16 %{{.*}} to <16 x i1> // CHECK: [[RES:%.*]] = shufflevector <16 x i1> zeroinitializer, <16 x i1> [[VAL]], <16 x i32> - // CHECK: bitcast <16 x i1> [[RES]] to i16 + // CHECK: bitcast <16 x i1> {{.*}} to i16 return _mm512_mask_cmpneq_epu32_mask(_kshiftli_mask16(_mm512_cmpneq_epu32_mask(A, B), 1), C, D); } __mmask16 test_kshiftri_mask16(__m512i A, __m512i B, __m512i C, __m512i D) { - // CHECK-LABEL: @test_kshiftri_mask16 + // CHECK-LABEL: test_kshiftri_mask16 // CHECK: [[VAL:%.*]] = bitcast i16 %{{.*}} to <16 x i1> // CHECK: [[RES:%.*]] = shufflevector <16 x i1> [[VAL]], <16 x i1> zeroinitializer, <16 x i32> - // CHECK: bitcast <16 x i1> [[RES]] to i16 + // CHECK: bitcast <16 x i1> {{.*}} to i16 return _mm512_mask_cmpneq_epu32_mask(_kshiftri_mask16(_mm512_cmpneq_epu32_mask(A, B), 1), C, D); } unsigned int test_cvtmask16_u32(__m512i A, __m512i B) { - // CHECK-LABEL: @test_cvtmask16_u32 + // CHECK-LABEL: test_cvtmask16_u32 // CHECK: bitcast <16 x i1> %{{.*}} to i16 // CHECK: bitcast i16 %{{.*}} to <16 x i1> // CHECK: zext i16 %{{.*}} to i32 @@ -8511,363 +8513,363 @@ unsigned int test_cvtmask16_u32(__m512i A, __m512i B) { } __mmask16 test_cvtu32_mask16(__m512i A, __m512i B, unsigned int C) { - // CHECK-LABEL: @test_cvtu32_mask16 + // CHECK-LABEL: test_cvtu32_mask16 // CHECK: trunc i32 %{{.*}} to i16 // CHECK: bitcast i16 %{{.*}} to <16 x i1> return _mm512_mask_cmpneq_epu32_mask(_cvtu32_mask16(C), A, B); } __mmask16 test_load_mask16(__mmask16 *A, __m512i B, __m512i C) { - // CHECK-LABEL: @test_load_mask16 + // CHECK-LABEL: test_load_mask16 // CHECK: [[LOAD:%.*]] = load i16, ptr %{{.*}}{{$}} // CHECK: bitcast i16 [[LOAD]] to <16 x i1> return _mm512_mask_cmpneq_epu32_mask(_load_mask16(A), B, C); } void test_store_mask16(__mmask16 *A, __m512i B, __m512i C) { - // CHECK-LABEL: @test_store_mask16 + // CHECK-LABEL: test_store_mask16 // CHECK: bitcast <16 x i1> %{{.*}} to i16 // CHECK: store i16 %{{.*}}, ptr %{{.*}} _store_mask16(A, _mm512_cmpneq_epu32_mask(B, C)); } void test_mm512_stream_si512(__m512i * __P, __m512i __A) { - // CHECK-LABEL: @test_mm512_stream_si512 + // CHECK-LABEL: test_mm512_stream_si512 // CHECK: store <8 x i64> %{{.*}}, ptr %{{.*}}, align 64, !nontemporal [[NONTEMPORAL:![0-9]+]] _mm512_stream_si512(__P, __A); } void test_mm512_stream_si512_2(void * __P, __m512i __A) { - // CHECK-LABEL: @test_mm512_stream_si512 + // CHECK-LABEL: test_mm512_stream_si512 // CHECK: store <8 x i64> %{{.*}}, ptr %{{.*}}, align 64, !nontemporal [[NONTEMPORAL]] _mm512_stream_si512(__P, __A); } __m512i test_mm512_stream_load_si512(void *__P) { - // CHECK-LABEL: @test_mm512_stream_load_si512 + // CHECK-LABEL: test_mm512_stream_load_si512 // CHECK: load <8 x i64>, ptr %{{.*}}, align 64, !nontemporal [[NONTEMPORAL]]{{$}} return _mm512_stream_load_si512(__P); } __m512i test_mm512_stream_load_si512_const(void const *__P) { - // CHECK-LABEL: @test_mm512_stream_load_si512_const + // CHECK-LABEL: test_mm512_stream_load_si512_const // CHECK: load <8 x i64>, ptr %{{.*}}, align 64, !nontemporal [[NONTEMPORAL]]{{$}} return _mm512_stream_load_si512(__P); } void test_mm512_stream_pd(double *__P, __m512d __A) { - // CHECK-LABEL: @test_mm512_stream_pd + // CHECK-LABEL: test_mm512_stream_pd // CHECK: store <8 x double> %{{.*}}, ptr %{{.*}}, align 64, !nontemporal [[NONTEMPORAL]] return _mm512_stream_pd(__P, __A); } void test_mm512_stream_pd_2(void *__P, __m512d __A) { - // CHECK-LABEL: @test_mm512_stream_pd + // CHECK-LABEL: test_mm512_stream_pd // CHECK: store <8 x double> %{{.*}}, ptr %{{.*}}, align 64, !nontemporal [[NONTEMPORAL]] return _mm512_stream_pd(__P, __A); } void test_mm512_stream_ps(float *__P, __m512 __A) { - // CHECK-LABEL: @test_mm512_stream_ps + // CHECK-LABEL: test_mm512_stream_ps // CHECK: store <16 x float> %{{.*}}, ptr %{{.*}}, align 64, !nontemporal [[NONTEMPORAL]] _mm512_stream_ps(__P, __A); } void test_mm512_stream_ps_2(void *__P, __m512 __A) { - // CHECK-LABEL: @test_mm512_stream_ps + // CHECK-LABEL: test_mm512_stream_ps // CHECK: store <16 x float> %{{.*}}, ptr %{{.*}}, align 64, !nontemporal [[NONTEMPORAL]] _mm512_stream_ps(__P, __A); } __m512d test_mm512_mask_compress_pd(__m512d __W, __mmask8 __U, __m512d __A) { - // CHECK-LABEL: @test_mm512_mask_compress_pd + // CHECK-LABEL: test_mm512_mask_compress_pd // CHECK: @llvm.x86.avx512.mask.compress return _mm512_mask_compress_pd(__W, __U, __A); } __m512d test_mm512_maskz_compress_pd(__mmask8 __U, __m512d __A) { - // CHECK-LABEL: @test_mm512_maskz_compress_pd + // CHECK-LABEL: test_mm512_maskz_compress_pd // CHECK: @llvm.x86.avx512.mask.compress return _mm512_maskz_compress_pd(__U, __A); } __m512i test_mm512_mask_compress_epi64(__m512i __W, __mmask8 __U, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_compress_epi64 + // CHECK-LABEL: test_mm512_mask_compress_epi64 // CHECK: @llvm.x86.avx512.mask.compress return _mm512_mask_compress_epi64(__W, __U, __A); } __m512i test_mm512_maskz_compress_epi64(__mmask8 __U, __m512i __A) { - // CHECK-LABEL: @test_mm512_maskz_compress_epi64 + // CHECK-LABEL: test_mm512_maskz_compress_epi64 // CHECK: @llvm.x86.avx512.mask.compress return _mm512_maskz_compress_epi64(__U, __A); } __m512 test_mm512_mask_compress_ps(__m512 __W, __mmask16 __U, __m512 __A) { - // CHECK-LABEL: @test_mm512_mask_compress_ps + // CHECK-LABEL: test_mm512_mask_compress_ps // CHECK: @llvm.x86.avx512.mask.compress return _mm512_mask_compress_ps(__W, __U, __A); } __m512 test_mm512_maskz_compress_ps(__mmask16 __U, __m512 __A) { - // CHECK-LABEL: @test_mm512_maskz_compress_ps + // CHECK-LABEL: test_mm512_maskz_compress_ps // CHECK: @llvm.x86.avx512.mask.compress return _mm512_maskz_compress_ps(__U, __A); } __m512i test_mm512_mask_compress_epi32(__m512i __W, __mmask16 __U, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_compress_epi32 + // CHECK-LABEL: test_mm512_mask_compress_epi32 // CHECK: @llvm.x86.avx512.mask.compress return _mm512_mask_compress_epi32(__W, __U, __A); } __m512i test_mm512_maskz_compress_epi32(__mmask16 __U, __m512i __A) { - // CHECK-LABEL: @test_mm512_maskz_compress_epi32 + // CHECK-LABEL: test_mm512_maskz_compress_epi32 // CHECK: @llvm.x86.avx512.mask.compress return _mm512_maskz_compress_epi32(__U, __A); } __mmask8 test_mm_cmp_round_ss_mask(__m128 __X, __m128 __Y) { - // CHECK-LABEL: @test_mm_cmp_round_ss_mask + // CHECK-LABEL: test_mm_cmp_round_ss_mask // CHECK: @llvm.x86.avx512.mask.cmp return _mm_cmp_round_ss_mask(__X, __Y, _CMP_NLT_US, _MM_FROUND_NO_EXC); } __mmask8 test_mm_mask_cmp_round_ss_mask(__mmask8 __M, __m128 __X, __m128 __Y) { - // CHECK-LABEL: @test_mm_mask_cmp_round_ss_mask + // CHECK-LABEL: test_mm_mask_cmp_round_ss_mask // CHECK: @llvm.x86.avx512.mask.cmp return _mm_mask_cmp_round_ss_mask(__M, __X, __Y, _CMP_NLT_US, _MM_FROUND_NO_EXC); } __mmask8 test_mm_cmp_ss_mask(__m128 __X, __m128 __Y) { - // CHECK-LABEL: @test_mm_cmp_ss_mask + // CHECK-LABEL: test_mm_cmp_ss_mask // CHECK: @llvm.x86.avx512.mask.cmp return _mm_cmp_ss_mask(__X, __Y, _CMP_NLT_US); } __mmask8 test_mm_mask_cmp_ss_mask(__mmask8 __M, __m128 __X, __m128 __Y) { - // CHECK-LABEL: @test_mm_mask_cmp_ss_mask + // CHECK-LABEL: test_mm_mask_cmp_ss_mask // CHECK: @llvm.x86.avx512.mask.cmp return _mm_mask_cmp_ss_mask(__M, __X, __Y, _CMP_NLT_US); } __mmask8 test_mm_cmp_round_sd_mask(__m128d __X, __m128d __Y) { - // CHECK-LABEL: @test_mm_cmp_round_sd_mask + // CHECK-LABEL: test_mm_cmp_round_sd_mask // CHECK: @llvm.x86.avx512.mask.cmp return _mm_cmp_round_sd_mask(__X, __Y, _CMP_NLT_US, _MM_FROUND_NO_EXC); } __mmask8 test_mm_mask_cmp_round_sd_mask(__mmask8 __M, __m128d __X, __m128d __Y) { - // CHECK-LABEL: @test_mm_mask_cmp_round_sd_mask + // CHECK-LABEL: test_mm_mask_cmp_round_sd_mask // CHECK: @llvm.x86.avx512.mask.cmp return _mm_mask_cmp_round_sd_mask(__M, __X, __Y, _CMP_NLT_US, _MM_FROUND_NO_EXC); } __mmask8 test_mm_cmp_sd_mask(__m128d __X, __m128d __Y) { - // CHECK-LABEL: @test_mm_cmp_sd_mask + // CHECK-LABEL: test_mm_cmp_sd_mask // CHECK: @llvm.x86.avx512.mask.cmp return _mm_cmp_sd_mask(__X, __Y, _CMP_NLT_US); } __mmask8 test_mm_mask_cmp_sd_mask(__mmask8 __M, __m128d __X, __m128d __Y) { - // CHECK-LABEL: @test_mm_mask_cmp_sd_mask + // CHECK-LABEL: test_mm_mask_cmp_sd_mask // CHECK: @llvm.x86.avx512.mask.cmp return _mm_mask_cmp_sd_mask(__M, __X, __Y, _CMP_NLT_US); } __m512 test_mm512_movehdup_ps(__m512 __A) { - // CHECK-LABEL: @test_mm512_movehdup_ps + // CHECK-LABEL: test_mm512_movehdup_ps // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x i32> return _mm512_movehdup_ps(__A); } __m512 test_mm512_mask_movehdup_ps(__m512 __W, __mmask16 __U, __m512 __A) { - // CHECK-LABEL: @test_mm512_mask_movehdup_ps + // CHECK-LABEL: test_mm512_mask_movehdup_ps // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x i32> // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return _mm512_mask_movehdup_ps(__W, __U, __A); } __m512 test_mm512_maskz_movehdup_ps(__mmask16 __U, __m512 __A) { - // CHECK-LABEL: @test_mm512_maskz_movehdup_ps + // CHECK-LABEL: test_mm512_maskz_movehdup_ps // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x i32> // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return _mm512_maskz_movehdup_ps(__U, __A); } __m512 test_mm512_moveldup_ps(__m512 __A) { - // CHECK-LABEL: @test_mm512_moveldup_ps + // CHECK-LABEL: test_mm512_moveldup_ps // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x i32> return _mm512_moveldup_ps(__A); } __m512 test_mm512_mask_moveldup_ps(__m512 __W, __mmask16 __U, __m512 __A) { - // CHECK-LABEL: @test_mm512_mask_moveldup_ps + // CHECK-LABEL: test_mm512_mask_moveldup_ps // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x i32> // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return _mm512_mask_moveldup_ps(__W, __U, __A); } __m512 test_mm512_maskz_moveldup_ps(__mmask16 __U, __m512 __A) { - // CHECK-LABEL: @test_mm512_maskz_moveldup_ps + // CHECK-LABEL: test_mm512_maskz_moveldup_ps // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x i32> // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return _mm512_maskz_moveldup_ps(__U, __A); } __m512i test_mm512_shuffle_epi32(__m512i __A) { - // CHECK-LABEL: @test_mm512_shuffle_epi32 + // CHECK-LABEL: test_mm512_shuffle_epi32 // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> poison, <16 x i32> return _mm512_shuffle_epi32(__A, 1); } __m512i test_mm512_mask_shuffle_epi32(__m512i __W, __mmask16 __U, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_shuffle_epi32 + // CHECK-LABEL: test_mm512_mask_shuffle_epi32 // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> poison, <16 x i32> // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_mask_shuffle_epi32(__W, __U, __A, 1); } __m512i test_mm512_maskz_shuffle_epi32(__mmask16 __U, __m512i __A) { - // CHECK-LABEL: @test_mm512_maskz_shuffle_epi32 + // CHECK-LABEL: test_mm512_maskz_shuffle_epi32 // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> poison, <16 x i32> // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_maskz_shuffle_epi32(__U, __A, 1); } __m512d test_mm512_mask_expand_pd(__m512d __W, __mmask8 __U, __m512d __A) { - // CHECK-LABEL: @test_mm512_mask_expand_pd + // CHECK-LABEL: test_mm512_mask_expand_pd // CHECK: @llvm.x86.avx512.mask.expand return _mm512_mask_expand_pd(__W, __U, __A); } __m512d test_mm512_maskz_expand_pd(__mmask8 __U, __m512d __A) { - // CHECK-LABEL: @test_mm512_maskz_expand_pd + // CHECK-LABEL: test_mm512_maskz_expand_pd // CHECK: @llvm.x86.avx512.mask.expand return _mm512_maskz_expand_pd(__U, __A); } __m512i test_mm512_mask_expand_epi64(__m512i __W, __mmask8 __U, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_expand_epi64 + // CHECK-LABEL: test_mm512_mask_expand_epi64 // CHECK: @llvm.x86.avx512.mask.expand return _mm512_mask_expand_epi64(__W, __U, __A); } __m512i test_mm512_maskz_expand_epi64(__mmask8 __U, __m512i __A) { - // CHECK-LABEL: @test_mm512_maskz_expand_epi64 + // CHECK-LABEL: test_mm512_maskz_expand_epi64 // CHECK: @llvm.x86.avx512.mask.expand return _mm512_maskz_expand_epi64(__U, __A); } __m512i test_mm512_mask_expandloadu_epi64(__m512i __W, __mmask8 __U, void const *__P) { - // CHECK-LABEL: @test_mm512_mask_expandloadu_epi64 + // CHECK-LABEL: test_mm512_mask_expandloadu_epi64 // CHECK: @llvm.masked.expandload.v8i64(ptr %{{.*}}, <8 x i1> %{{.*}}, <8 x i64> %{{.*}}) return _mm512_mask_expandloadu_epi64(__W, __U, __P); } __m512i test_mm512_maskz_expandloadu_epi64(__mmask8 __U, void const *__P) { - // CHECK-LABEL: @test_mm512_maskz_expandloadu_epi64 + // CHECK-LABEL: test_mm512_maskz_expandloadu_epi64 // CHECK: @llvm.masked.expandload.v8i64(ptr %{{.*}}, <8 x i1> %{{.*}}, <8 x i64> %{{.*}}) return _mm512_maskz_expandloadu_epi64(__U, __P); } __m512d test_mm512_mask_expandloadu_pd(__m512d __W, __mmask8 __U, void const *__P) { - // CHECK-LABEL: @test_mm512_mask_expandloadu_pd + // CHECK-LABEL: test_mm512_mask_expandloadu_pd // CHECK: @llvm.masked.expandload.v8f64(ptr %{{.*}}, <8 x i1> %{{.*}}, <8 x double> %{{.*}}) return _mm512_mask_expandloadu_pd(__W, __U, __P); } __m512d test_mm512_maskz_expandloadu_pd(__mmask8 __U, void const *__P) { - // CHECK-LABEL: @test_mm512_maskz_expandloadu_pd + // CHECK-LABEL: test_mm512_maskz_expandloadu_pd // CHECK: @llvm.masked.expandload.v8f64(ptr %{{.*}}, <8 x i1> %{{.*}}, <8 x double> %{{.*}}) return _mm512_maskz_expandloadu_pd(__U, __P); } __m512i test_mm512_mask_expandloadu_epi32(__m512i __W, __mmask16 __U, void const *__P) { - // CHECK-LABEL: @test_mm512_mask_expandloadu_epi32 + // CHECK-LABEL: test_mm512_mask_expandloadu_epi32 // CHECK: @llvm.masked.expandload.v16i32(ptr %{{.*}}, <16 x i1> %{{.*}}, <16 x i32> %{{.*}}) return _mm512_mask_expandloadu_epi32(__W, __U, __P); } __m512i test_mm512_maskz_expandloadu_epi32(__mmask16 __U, void const *__P) { - // CHECK-LABEL: @test_mm512_maskz_expandloadu_epi32 + // CHECK-LABEL: test_mm512_maskz_expandloadu_epi32 // CHECK: @llvm.masked.expandload.v16i32(ptr %{{.*}}, <16 x i1> %{{.*}}, <16 x i32> %{{.*}}) return _mm512_maskz_expandloadu_epi32(__U, __P); } __m512 test_mm512_mask_expandloadu_ps(__m512 __W, __mmask16 __U, void const *__P) { - // CHECK-LABEL: @test_mm512_mask_expandloadu_ps + // CHECK-LABEL: test_mm512_mask_expandloadu_ps // CHECK: @llvm.masked.expandload.v16f32(ptr %{{.*}}, <16 x i1> %{{.*}}, <16 x float> %{{.*}}) return _mm512_mask_expandloadu_ps(__W, __U, __P); } __m512 test_mm512_maskz_expandloadu_ps(__mmask16 __U, void const *__P) { - // CHECK-LABEL: @test_mm512_maskz_expandloadu_ps + // CHECK-LABEL: test_mm512_maskz_expandloadu_ps // CHECK: @llvm.masked.expandload.v16f32(ptr %{{.*}}, <16 x i1> %{{.*}}, <16 x float> %{{.*}}) return _mm512_maskz_expandloadu_ps(__U, __P); } __m512 test_mm512_mask_expand_ps(__m512 __W, __mmask16 __U, __m512 __A) { - // CHECK-LABEL: @test_mm512_mask_expand_ps + // CHECK-LABEL: test_mm512_mask_expand_ps // CHECK: @llvm.x86.avx512.mask.expand return _mm512_mask_expand_ps(__W, __U, __A); } __m512 test_mm512_maskz_expand_ps(__mmask16 __U, __m512 __A) { - // CHECK-LABEL: @test_mm512_maskz_expand_ps + // CHECK-LABEL: test_mm512_maskz_expand_ps // CHECK: @llvm.x86.avx512.mask.expand return _mm512_maskz_expand_ps(__U, __A); } __m512i test_mm512_mask_expand_epi32(__m512i __W, __mmask16 __U, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_expand_epi32 + // CHECK-LABEL: test_mm512_mask_expand_epi32 // CHECK: @llvm.x86.avx512.mask.expand return _mm512_mask_expand_epi32(__W, __U, __A); } __m512i test_mm512_maskz_expand_epi32(__mmask16 __U, __m512i __A) { - // CHECK-LABEL: @test_mm512_maskz_expand_epi32 + // CHECK-LABEL: test_mm512_maskz_expand_epi32 // CHECK: @llvm.x86.avx512.mask.expand return _mm512_maskz_expand_epi32(__U, __A); } __m512d test_mm512_cvt_roundps_pd(__m256 __A) { - // CHECK-LABEL: @test_mm512_cvt_roundps_pd + // CHECK-LABEL: test_mm512_cvt_roundps_pd // CHECK: @llvm.x86.avx512.mask.cvtps2pd.512 return _mm512_cvt_roundps_pd(__A, _MM_FROUND_NO_EXC); } __m512d test_mm512_mask_cvt_roundps_pd(__m512d __W, __mmask8 __U, __m256 __A) { - // CHECK-LABEL: @test_mm512_mask_cvt_roundps_pd + // CHECK-LABEL: test_mm512_mask_cvt_roundps_pd // CHECK: @llvm.x86.avx512.mask.cvtps2pd.512 return _mm512_mask_cvt_roundps_pd(__W, __U, __A, _MM_FROUND_NO_EXC); } __m512d test_mm512_maskz_cvt_roundps_pd(__mmask8 __U, __m256 __A) { - // CHECK-LABEL: @test_mm512_maskz_cvt_roundps_pd + // CHECK-LABEL: test_mm512_maskz_cvt_roundps_pd // CHECK: @llvm.x86.avx512.mask.cvtps2pd.512 return _mm512_maskz_cvt_roundps_pd(__U, __A, _MM_FROUND_NO_EXC); } __m512d test_mm512_cvtps_pd(__m256 __A) { - // CHECK-LABEL: @test_mm512_cvtps_pd + // CHECK-LABEL: test_mm512_cvtps_pd // CHECK: fpext <8 x float> %{{.*}} to <8 x double> return _mm512_cvtps_pd(__A); } __m512d test_mm512_cvtpslo_pd(__m512 __A) { - // CHECK-LABEL: @test_mm512_cvtpslo_pd + // CHECK-LABEL: test_mm512_cvtpslo_pd // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> %{{.*}}, <8 x i32> // CHECK: fpext <8 x float> %{{.*}} to <8 x double> return _mm512_cvtpslo_pd(__A); } __m512d test_mm512_mask_cvtps_pd(__m512d __W, __mmask8 __U, __m256 __A) { - // CHECK-LABEL: @test_mm512_mask_cvtps_pd + // CHECK-LABEL: test_mm512_mask_cvtps_pd // CHECK: fpext <8 x float> %{{.*}} to <8 x double> // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} return _mm512_mask_cvtps_pd(__W, __U, __A); } __m512d test_mm512_mask_cvtpslo_pd(__m512d __W, __mmask8 __U, __m512 __A) { - // CHECK-LABEL: @test_mm512_mask_cvtpslo_pd + // CHECK-LABEL: test_mm512_mask_cvtpslo_pd // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> %{{.*}}, <8 x i32> // CHECK: fpext <8 x float> %{{.*}} to <8 x double> // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} @@ -8875,118 +8877,118 @@ __m512d test_mm512_mask_cvtpslo_pd(__m512d __W, __mmask8 __U, __m512 __A) { } __m512d test_mm512_maskz_cvtps_pd(__mmask8 __U, __m256 __A) { - // CHECK-LABEL: @test_mm512_maskz_cvtps_pd + // CHECK-LABEL: test_mm512_maskz_cvtps_pd // CHECK: fpext <8 x float> %{{.*}} to <8 x double> // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} return _mm512_maskz_cvtps_pd(__U, __A); } __m512d test_mm512_mask_mov_pd(__m512d __W, __mmask8 __U, __m512d __A) { - // CHECK-LABEL: @test_mm512_mask_mov_pd + // CHECK-LABEL: test_mm512_mask_mov_pd // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} return _mm512_mask_mov_pd(__W, __U, __A); } __m512d test_mm512_maskz_mov_pd(__mmask8 __U, __m512d __A) { - // CHECK-LABEL: @test_mm512_maskz_mov_pd + // CHECK-LABEL: test_mm512_maskz_mov_pd // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} return _mm512_maskz_mov_pd(__U, __A); } __m512 test_mm512_mask_mov_ps(__m512 __W, __mmask16 __U, __m512 __A) { - // CHECK-LABEL: @test_mm512_mask_mov_ps + // CHECK-LABEL: test_mm512_mask_mov_ps // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return _mm512_mask_mov_ps(__W, __U, __A); } __m512 test_mm512_maskz_mov_ps(__mmask16 __U, __m512 __A) { - // CHECK-LABEL: @test_mm512_maskz_mov_ps + // CHECK-LABEL: test_mm512_maskz_mov_ps // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return _mm512_maskz_mov_ps(__U, __A); } void test_mm512_mask_compressstoreu_pd(void *__P, __mmask8 __U, __m512d __A) { - // CHECK-LABEL: @test_mm512_mask_compressstoreu_pd + // CHECK-LABEL: test_mm512_mask_compressstoreu_pd // CHECK: @llvm.masked.compressstore.v8f64(<8 x double> %{{.*}}, ptr %{{.*}}, <8 x i1> %{{.*}}) return _mm512_mask_compressstoreu_pd(__P, __U, __A); } void test_mm512_mask_compressstoreu_epi64(void *__P, __mmask8 __U, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_compressstoreu_epi64 + // CHECK-LABEL: test_mm512_mask_compressstoreu_epi64 // CHECK: @llvm.masked.compressstore.v8i64(<8 x i64> %{{.*}}, ptr %{{.*}}, <8 x i1> %{{.*}}) return _mm512_mask_compressstoreu_epi64(__P, __U, __A); } void test_mm512_mask_compressstoreu_ps(void *__P, __mmask16 __U, __m512 __A) { - // CHECK-LABEL: @test_mm512_mask_compressstoreu_ps + // CHECK-LABEL: test_mm512_mask_compressstoreu_ps // CHECK: @llvm.masked.compressstore.v16f32(<16 x float> %{{.*}}, ptr %{{.*}}, <16 x i1> %{{.*}}) return _mm512_mask_compressstoreu_ps(__P, __U, __A); } void test_mm512_mask_compressstoreu_epi32(void *__P, __mmask16 __U, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_compressstoreu_epi32 + // CHECK-LABEL: test_mm512_mask_compressstoreu_epi32 // CHECK: @llvm.masked.compressstore.v16i32(<16 x i32> %{{.*}}, ptr %{{.*}}, <16 x i1> %{{.*}}) return _mm512_mask_compressstoreu_epi32(__P, __U, __A); } __m256i test_mm512_cvtt_roundpd_epu32(__m512d __A) { - // CHECK-LABEL: @test_mm512_cvtt_roundpd_epu32 + // CHECK-LABEL: test_mm512_cvtt_roundpd_epu32 // CHECK: @llvm.x86.avx512.mask.cvttpd2udq.512 return _mm512_cvtt_roundpd_epu32(__A, _MM_FROUND_NO_EXC); } __m256i test_mm512_mask_cvtt_roundpd_epu32(__m256i __W, __mmask8 __U, __m512d __A) { - // CHECK-LABEL: @test_mm512_mask_cvtt_roundpd_epu32 + // CHECK-LABEL: test_mm512_mask_cvtt_roundpd_epu32 // CHECK: @llvm.x86.avx512.mask.cvttpd2udq.512 return _mm512_mask_cvtt_roundpd_epu32(__W, __U, __A, _MM_FROUND_NO_EXC); } __m256i test_mm512_maskz_cvtt_roundpd_epu32(__mmask8 __U, __m512d __A) { - // CHECK-LABEL: @test_mm512_maskz_cvtt_roundpd_epu32 + // CHECK-LABEL: test_mm512_maskz_cvtt_roundpd_epu32 // CHECK: @llvm.x86.avx512.mask.cvttpd2udq.512 return _mm512_maskz_cvtt_roundpd_epu32(__U, __A, _MM_FROUND_NO_EXC); } __m256i test_mm512_cvttpd_epu32(__m512d __A) { - // CHECK-LABEL: @test_mm512_cvttpd_epu32 + // CHECK-LABEL: test_mm512_cvttpd_epu32 // CHECK: @llvm.x86.avx512.mask.cvttpd2udq.512 return _mm512_cvttpd_epu32(__A); } __m256i test_mm512_mask_cvttpd_epu32(__m256i __W, __mmask8 __U, __m512d __A) { - // CHECK-LABEL: @test_mm512_mask_cvttpd_epu32 + // CHECK-LABEL: test_mm512_mask_cvttpd_epu32 // CHECK: @llvm.x86.avx512.mask.cvttpd2udq.512 return _mm512_mask_cvttpd_epu32(__W, __U, __A); } __m256i test_mm512_maskz_cvttpd_epu32(__mmask8 __U, __m512d __A) { - // CHECK-LABEL: @test_mm512_maskz_cvttpd_epu32 + // CHECK-LABEL: test_mm512_maskz_cvttpd_epu32 // CHECK: @llvm.x86.avx512.mask.cvttpd2udq.512 return _mm512_maskz_cvttpd_epu32(__U, __A); } __m512 test_mm512_castpd_ps (__m512d __A) { - // CHECK-LABEL: @test_mm512_castpd_ps + // CHECK-LABEL: test_mm512_castpd_ps // CHECK: bitcast <8 x double> %{{.}} to <16 x float> return _mm512_castpd_ps (__A); } __m512d test_mm512_castps_pd (__m512 __A) { - // CHECK-LABEL: @test_mm512_castps_pd + // CHECK-LABEL: test_mm512_castps_pd // CHECK: bitcast <16 x float> %{{.}} to <8 x double> return _mm512_castps_pd (__A); } __m512i test_mm512_castpd_si512 (__m512d __A) { - // CHECK-LABEL: @test_mm512_castpd_si512 + // CHECK-LABEL: test_mm512_castpd_si512 // CHECK: bitcast <8 x double> %{{.}} to <8 x i64> return _mm512_castpd_si512 (__A); } __m512 test_mm512_castps128_ps512(__m128 __A) { - // CHECK-LABEL: @test_mm512_castps128_ps512 + // CHECK-LABEL: test_mm512_castps128_ps512 // CHECK: [[B:%.*]] = freeze <8 x float> poison // CHECK: store <8 x float> [[B]], ptr [[BA:%.*]] // CHECK: [[A:%.*]] = freeze <4 x float> poison @@ -8997,7 +8999,7 @@ __m512 test_mm512_castps128_ps512(__m128 __A) { } __m512d test_mm512_castpd128_pd512(__m128d __A) { - // CHECK-LABEL: @test_mm512_castpd128_pd512 + // CHECK-LABEL: test_mm512_castpd128_pd512 // CHECK: [[B:%.*]] = freeze <4 x double> poison // CHECK: store <4 x double> [[B]], ptr [[BA:%.*]] // CHECK: [[A:%.*]] = freeze <2 x double> poison @@ -9009,7 +9011,7 @@ __m512d test_mm512_castpd128_pd512(__m128d __A) { __m512i test_mm512_set1_epi8(char d) { - // CHECK-LABEL: @test_mm512_set1_epi8 + // CHECK-LABEL: test_mm512_set1_epi8 // CHECK: insertelement <64 x i8> {{.*}}, i32 0 // CHECK: insertelement <64 x i8> {{.*}}, i32 1 // CHECK: insertelement <64 x i8> {{.*}}, i32 2 @@ -9024,7 +9026,7 @@ __m512i test_mm512_set1_epi8(char d) __m512i test_mm512_set1_epi16(short d) { - // CHECK-LABEL: @test_mm512_set1_epi16 + // CHECK-LABEL: test_mm512_set1_epi16 // CHECK: insertelement <32 x i16> {{.*}}, i32 0 // CHECK: insertelement <32 x i16> {{.*}}, i32 1 // CHECK: insertelement <32 x i16> {{.*}}, i32 2 @@ -9039,63 +9041,63 @@ __m512i test_mm512_set1_epi16(short d) __m512i test_mm512_set4_epi32 (int __A, int __B, int __C, int __D) { - // CHECK-LABEL: @test_mm512_set4_epi32 + // CHECK-LABEL: test_mm512_set4_epi32 // CHECK: insertelement <16 x i32> {{.*}}, i32 15 return _mm512_set4_epi32 (__A,__B,__C,__D); } __m512i test_mm512_set4_epi64 (long long __A, long long __B, long long __C, long long __D) { - // CHECK-LABEL: @test_mm512_set4_epi64 + // CHECK-LABEL: test_mm512_set4_epi64 // CHECK: insertelement <8 x i64> {{.*}}, i32 7 return _mm512_set4_epi64 (__A,__B,__C,__D); } __m512d test_mm512_set4_pd (double __A, double __B, double __C, double __D) { - // CHECK-LABEL: @test_mm512_set4_pd + // CHECK-LABEL: test_mm512_set4_pd // CHECK: insertelement <8 x double> {{.*}}, i32 7 return _mm512_set4_pd (__A,__B,__C,__D); } __m512 test_mm512_set4_ps (float __A, float __B, float __C, float __D) { - // CHECK-LABEL: @test_mm512_set4_ps + // CHECK-LABEL: test_mm512_set4_ps // CHECK: insertelement <16 x float> {{.*}}, i32 15 return _mm512_set4_ps (__A,__B,__C,__D); } __m512i test_mm512_setr4_epi32(int e0, int e1, int e2, int e3) { - // CHECK-LABEL: @test_mm512_setr4_epi32 + // CHECK-LABEL: test_mm512_setr4_epi32 // CHECK: insertelement <16 x i32> {{.*}}, i32 15 return _mm512_setr4_epi32(e0, e1, e2, e3); } __m512i test_mm512_setr4_epi64(long long e0, long long e1, long long e2, long long e3) { - // CHECK-LABEL: @test_mm512_setr4_epi64 + // CHECK-LABEL: test_mm512_setr4_epi64 // CHECK: insertelement <8 x i64> {{.*}}, i32 7 return _mm512_setr4_epi64(e0, e1, e2, e3); } __m512d test_mm512_setr4_pd(double e0, double e1, double e2, double e3) { - // CHECK-LABEL: @test_mm512_setr4_pd + // CHECK-LABEL: test_mm512_setr4_pd // CHECK: insertelement <8 x double> {{.*}}, i32 7 return _mm512_setr4_pd(e0,e1,e2,e3); } __m512 test_mm512_setr4_ps(float e0, float e1, float e2, float e3) { - // CHECK-LABEL: @test_mm512_setr4_ps + // CHECK-LABEL: test_mm512_setr4_ps // CHECK: insertelement <16 x float> {{.*}}, i32 15 return _mm512_setr4_ps(e0,e1,e2,e3); } __m512d test_mm512_castpd256_pd512(__m256d a) { - // CHECK-LABEL: @test_mm512_castpd256_pd512 + // CHECK-LABEL: test_mm512_castpd256_pd512 // CHECK: [[A:%.*]] = freeze <4 x double> poison // CHECK: shufflevector <4 x double> %{{.}}, <4 x double> [[A]], <8 x i32> return _mm512_castpd256_pd512(a); @@ -9103,26 +9105,26 @@ __m512d test_mm512_castpd256_pd512(__m256d a) __m256d test_mm512_castpd512_pd256 (__m512d __A) { - // CHECK-LABEL: @test_mm512_castpd512_pd256 + // CHECK-LABEL: test_mm512_castpd512_pd256 // CHECK: shufflevector <8 x double> %{{.}}, <8 x double> %{{.}}, <4 x i32> return _mm512_castpd512_pd256 (__A); } __m256 test_mm512_castps512_ps256 (__m512 __A) { - // CHECK-LABEL: @test_mm512_castps512_ps256 + // CHECK-LABEL: test_mm512_castps512_ps256 // CHECK: shufflevector <16 x float> %{{.}}, <16 x float> %{{.}}, <8 x i32> return _mm512_castps512_ps256 (__A); } __m512i test_mm512_castps_si512 (__m512 __A) { - // CHECK-LABEL: @test_mm512_castps_si512 + // CHECK-LABEL: test_mm512_castps_si512 // CHECK: bitcast <16 x float> %{{.}} to <8 x i64> return _mm512_castps_si512 (__A); } __m512i test_mm512_castsi128_si512(__m128i __A) { - // CHECK-LABEL: @test_mm512_castsi128_si512 + // CHECK-LABEL: test_mm512_castsi128_si512 // CHECK: [[B:%.*]] = freeze <4 x i64> poison // CHECK: store <4 x i64> [[B]], ptr [[BA:%.*]] // CHECK: [[A:%.*]] = freeze <2 x i64> poison @@ -9133,7 +9135,7 @@ __m512i test_mm512_castsi128_si512(__m128i __A) { } __m512i test_mm512_castsi256_si512(__m256i __A) { - // CHECK-LABEL: @test_mm512_castsi256_si512 + // CHECK-LABEL: test_mm512_castsi256_si512 // CHECK: [[A:%.*]] = freeze <4 x i64> poison // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> [[A]], <8 x i32> return _mm512_castsi256_si512(__A); @@ -9141,110 +9143,110 @@ __m512i test_mm512_castsi256_si512(__m256i __A) { __m512 test_mm512_castsi512_ps (__m512i __A) { - // CHECK-LABEL: @test_mm512_castsi512_ps + // CHECK-LABEL: test_mm512_castsi512_ps // CHECK: bitcast <8 x i64> %{{.}} to <16 x float> return _mm512_castsi512_ps (__A); } __m512d test_mm512_castsi512_pd (__m512i __A) { - // CHECK-LABEL: @test_mm512_castsi512_pd + // CHECK-LABEL: test_mm512_castsi512_pd // CHECK: bitcast <8 x i64> %{{.}} to <8 x double> return _mm512_castsi512_pd (__A); } __m128i test_mm512_castsi512_si128 (__m512i __A) { - // CHECK-LABEL: @test_mm512_castsi512_si128 + // CHECK-LABEL: test_mm512_castsi512_si128 // CHECK: shufflevector <8 x i64> %{{.}}, <8 x i64> %{{.}}, <2 x i32> return _mm512_castsi512_si128 (__A); } __m256i test_mm512_castsi512_si256 (__m512i __A) { - // CHECK-LABEL: @test_mm512_castsi512_si256 + // CHECK-LABEL: test_mm512_castsi512_si256 // CHECK: shufflevector <8 x i64> %{{.}}, <8 x i64> %{{.}}, <4 x i32> return _mm512_castsi512_si256 (__A); } __m128 test_mm_cvt_roundsd_ss(__m128 __A, __m128d __B) { - // CHECK-LABEL: @test_mm_cvt_roundsd_ss + // CHECK-LABEL: test_mm_cvt_roundsd_ss // CHECK: @llvm.x86.avx512.mask.cvtsd2ss.round return _mm_cvt_roundsd_ss(__A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m128 test_mm_mask_cvt_roundsd_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128d __B) { - // CHECK-LABEL: @test_mm_mask_cvt_roundsd_ss + // CHECK-LABEL: test_mm_mask_cvt_roundsd_ss // CHECK: @llvm.x86.avx512.mask.cvtsd2ss.round return _mm_mask_cvt_roundsd_ss(__W, __U, __A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m128 test_mm_maskz_cvt_roundsd_ss(__mmask8 __U, __m128 __A, __m128d __B) { - // CHECK-LABEL: @test_mm_maskz_cvt_roundsd_ss + // CHECK-LABEL: test_mm_maskz_cvt_roundsd_ss // CHECK: @llvm.x86.avx512.mask.cvtsd2ss.round return _mm_maskz_cvt_roundsd_ss(__U, __A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } #ifdef __x86_64__ __m128d test_mm_cvt_roundi64_sd(__m128d __A, long long __B) { - // CHECK-LABEL: @test_mm_cvt_roundi64_sd + // CHECK-LABEL: test_mm_cvt_roundi64_sd // CHECK: @llvm.x86.avx512.cvtsi2sd64 return _mm_cvt_roundi64_sd(__A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m128d test_mm_cvt_roundsi64_sd(__m128d __A, long long __B) { - // CHECK-LABEL: @test_mm_cvt_roundsi64_sd + // CHECK-LABEL: test_mm_cvt_roundsi64_sd // CHECK: @llvm.x86.avx512.cvtsi2sd64 return _mm_cvt_roundsi64_sd(__A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } #endif __m128 test_mm_cvt_roundsi32_ss(__m128 __A, int __B) { - // CHECK-LABEL: @test_mm_cvt_roundsi32_ss + // CHECK-LABEL: test_mm_cvt_roundsi32_ss // CHECK: @llvm.x86.avx512.cvtsi2ss32 return _mm_cvt_roundsi32_ss(__A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m128 test_mm_cvt_roundi32_ss(__m128 __A, int __B) { - // CHECK-LABEL: @test_mm_cvt_roundi32_ss + // CHECK-LABEL: test_mm_cvt_roundi32_ss // CHECK: @llvm.x86.avx512.cvtsi2ss32 return _mm_cvt_roundi32_ss(__A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } #ifdef __x86_64__ __m128 test_mm_cvt_roundsi64_ss(__m128 __A, long long __B) { - // CHECK-LABEL: @test_mm_cvt_roundsi64_ss + // CHECK-LABEL: test_mm_cvt_roundsi64_ss // CHECK: @llvm.x86.avx512.cvtsi2ss64 return _mm_cvt_roundsi64_ss(__A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m128 test_mm_cvt_roundi64_ss(__m128 __A, long long __B) { - // CHECK-LABEL: @test_mm_cvt_roundi64_ss + // CHECK-LABEL: test_mm_cvt_roundi64_ss // CHECK: @llvm.x86.avx512.cvtsi2ss64 return _mm_cvt_roundi64_ss(__A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } #endif __m128d test_mm_cvt_roundss_sd(__m128d __A, __m128 __B) { - // CHECK-LABEL: @test_mm_cvt_roundss_sd + // CHECK-LABEL: test_mm_cvt_roundss_sd // CHECK: @llvm.x86.avx512.mask.cvtss2sd.round return _mm_cvt_roundss_sd(__A, __B, _MM_FROUND_NO_EXC); } __m128d test_mm_mask_cvt_roundss_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128 __B) { - // CHECK-LABEL: @test_mm_mask_cvt_roundss_sd + // CHECK-LABEL: test_mm_mask_cvt_roundss_sd // CHECK: @llvm.x86.avx512.mask.cvtss2sd.round return _mm_mask_cvt_roundss_sd(__W, __U, __A, __B, _MM_FROUND_NO_EXC); } __m128d test_mm_maskz_cvt_roundss_sd( __mmask8 __U, __m128d __A, __m128 __B) { - // CHECK-LABEL: @test_mm_maskz_cvt_roundss_sd + // CHECK-LABEL: test_mm_maskz_cvt_roundss_sd // CHECK: @llvm.x86.avx512.mask.cvtss2sd.round return _mm_maskz_cvt_roundss_sd( __U, __A, __B, _MM_FROUND_NO_EXC); } __m128d test_mm_cvtu32_sd(__m128d __A, unsigned __B) { - // CHECK-LABEL: @test_mm_cvtu32_sd + // CHECK-LABEL: test_mm_cvtu32_sd // CHECK: uitofp i32 %{{.*}} to double // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0 return _mm_cvtu32_sd(__A, __B); @@ -9252,13 +9254,13 @@ __m128d test_mm_cvtu32_sd(__m128d __A, unsigned __B) { #ifdef __x86_64__ __m128d test_mm_cvt_roundu64_sd(__m128d __A, unsigned long long __B) { - // CHECK-LABEL: @test_mm_cvt_roundu64_sd + // CHECK-LABEL: test_mm_cvt_roundu64_sd // CHECK: @llvm.x86.avx512.cvtusi642sd return _mm_cvt_roundu64_sd(__A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m128d test_mm_cvtu64_sd(__m128d __A, unsigned long long __B) { - // CHECK-LABEL: @test_mm_cvtu64_sd + // CHECK-LABEL: test_mm_cvtu64_sd // CHECK: uitofp i64 %{{.*}} to double // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0 return _mm_cvtu64_sd(__A, __B); @@ -9266,13 +9268,13 @@ __m128d test_mm_cvtu64_sd(__m128d __A, unsigned long long __B) { #endif __m128 test_mm_cvt_roundu32_ss(__m128 __A, unsigned __B) { - // CHECK-LABEL: @test_mm_cvt_roundu32_ss + // CHECK-LABEL: test_mm_cvt_roundu32_ss // CHECK: @llvm.x86.avx512.cvtusi2ss return _mm_cvt_roundu32_ss(__A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m128 test_mm_cvtu32_ss(__m128 __A, unsigned __B) { - // CHECK-LABEL: @test_mm_cvtu32_ss + // CHECK-LABEL: test_mm_cvtu32_ss // CHECK: uitofp i32 %{{.*}} to float // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0 return _mm_cvtu32_ss(__A, __B); @@ -9280,13 +9282,13 @@ __m128 test_mm_cvtu32_ss(__m128 __A, unsigned __B) { #ifdef __x86_64__ __m128 test_mm_cvt_roundu64_ss(__m128 __A, unsigned long long __B) { - // CHECK-LABEL: @test_mm_cvt_roundu64_ss + // CHECK-LABEL: test_mm_cvt_roundu64_ss // CHECK: @llvm.x86.avx512.cvtusi642ss return _mm_cvt_roundu64_ss(__A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } __m128 test_mm_cvtu64_ss(__m128 __A, unsigned long long __B) { - // CHECK-LABEL: @test_mm_cvtu64_ss + // CHECK-LABEL: test_mm_cvtu64_ss // CHECK: uitofp i64 %{{.*}} to float // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0 return _mm_cvtu64_ss(__A, __B); @@ -9295,28 +9297,28 @@ __m128 test_mm_cvtu64_ss(__m128 __A, unsigned long long __B) { __m512i test_mm512_mask_cvttps_epu32 (__m512i __W, __mmask16 __U, __m512 __A) { - // CHECK-LABEL: @test_mm512_mask_cvttps_epu32 + // CHECK-LABEL: test_mm512_mask_cvttps_epu32 // CHECK: @llvm.x86.avx512.mask.cvttps2udq.512 return _mm512_mask_cvttps_epu32 (__W,__U,__A); } __m512i test_mm512_maskz_cvttps_epu32 (__mmask16 __U, __m512 __A) { - // CHECK-LABEL: @test_mm512_maskz_cvttps_epu32 + // CHECK-LABEL: test_mm512_maskz_cvttps_epu32 // CHECK: @llvm.x86.avx512.mask.cvttps2udq.512 return _mm512_maskz_cvttps_epu32 (__U,__A); } __m512 test_mm512_cvtepu32_ps (__m512i __A) { - // CHECK-LABEL: @test_mm512_cvtepu32_ps + // CHECK-LABEL: test_mm512_cvtepu32_ps // CHECK: uitofp <16 x i32> %{{.*}} to <16 x float> return _mm512_cvtepu32_ps (__A); } __m512 test_mm512_mask_cvtepu32_ps (__m512 __W, __mmask16 __U, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_cvtepu32_ps + // CHECK-LABEL: test_mm512_mask_cvtepu32_ps // CHECK: uitofp <16 x i32> %{{.*}} to <16 x float> // CHECK: select <16 x i1> {{.*}}, <16 x float> {{.*}}, <16 x float> {{.*}} return _mm512_mask_cvtepu32_ps (__W,__U,__A); @@ -9324,7 +9326,7 @@ __m512 test_mm512_mask_cvtepu32_ps (__m512 __W, __mmask16 __U, __m512i __A) __m512 test_mm512_maskz_cvtepu32_ps (__mmask16 __U, __m512i __A) { - // CHECK-LABEL: @test_mm512_maskz_cvtepu32_ps + // CHECK-LABEL: test_mm512_maskz_cvtepu32_ps // CHECK: uitofp <16 x i32> %{{.*}} to <16 x float> // CHECK: select <16 x i1> {{.*}}, <16 x float> {{.*}}, <16 x float> {{.*}} return _mm512_maskz_cvtepu32_ps (__U,__A); @@ -9332,14 +9334,14 @@ __m512 test_mm512_maskz_cvtepu32_ps (__mmask16 __U, __m512i __A) __m512d test_mm512_cvtepi32_pd (__m256i __A) { - // CHECK-LABEL: @test_mm512_cvtepi32_pd + // CHECK-LABEL: test_mm512_cvtepi32_pd // CHECK: sitofp <8 x i32> %{{.*}} to <8 x double> return _mm512_cvtepi32_pd (__A); } __m512d test_mm512_mask_cvtepi32_pd (__m512d __W, __mmask8 __U, __m256i __A) { - // CHECK-LABEL: @test_mm512_mask_cvtepi32_pd + // CHECK-LABEL: test_mm512_mask_cvtepi32_pd // CHECK: sitofp <8 x i32> %{{.*}} to <8 x double> // CHECK: select <8 x i1> {{.*}}, <8 x double> {{.*}}, <8 x double> {{.*}} return _mm512_mask_cvtepi32_pd (__W,__U,__A); @@ -9347,7 +9349,7 @@ __m512d test_mm512_mask_cvtepi32_pd (__m512d __W, __mmask8 __U, __m256i __A) __m512d test_mm512_maskz_cvtepi32_pd (__mmask8 __U, __m256i __A) { - // CHECK-LABEL: @test_mm512_maskz_cvtepi32_pd + // CHECK-LABEL: test_mm512_maskz_cvtepi32_pd // CHECK: sitofp <8 x i32> %{{.*}} to <8 x double> // CHECK: select <8 x i1> {{.*}}, <8 x double> {{.*}}, <8 x double> {{.*}} return _mm512_maskz_cvtepi32_pd (__U,__A); @@ -9355,7 +9357,7 @@ __m512d test_mm512_maskz_cvtepi32_pd (__mmask8 __U, __m256i __A) __m512d test_mm512_cvtepi32lo_pd (__m512i __A) { - // CHECK-LABEL: @test_mm512_cvtepi32lo_pd + // CHECK-LABEL: test_mm512_cvtepi32lo_pd // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <4 x i32> // CHECK: sitofp <8 x i32> %{{.*}} to <8 x double> return _mm512_cvtepi32lo_pd (__A); @@ -9363,7 +9365,7 @@ __m512d test_mm512_cvtepi32lo_pd (__m512i __A) __m512d test_mm512_mask_cvtepi32lo_pd (__m512d __W, __mmask8 __U, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_cvtepi32lo_pd + // CHECK-LABEL: test_mm512_mask_cvtepi32lo_pd // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <4 x i32> // CHECK: sitofp <8 x i32> %{{.*}} to <8 x double> // CHECK: select <8 x i1> {{.*}}, <8 x double> {{.*}}, <8 x double> {{.*}} @@ -9372,14 +9374,14 @@ __m512d test_mm512_mask_cvtepi32lo_pd (__m512d __W, __mmask8 __U, __m512i __A) __m512 test_mm512_cvtepi32_ps (__m512i __A) { - // CHECK-LABEL: @test_mm512_cvtepi32_ps + // CHECK-LABEL: test_mm512_cvtepi32_ps // CHECK: sitofp <16 x i32> %{{.*}} to <16 x float> return _mm512_cvtepi32_ps (__A); } __m512 test_mm512_mask_cvtepi32_ps (__m512 __W, __mmask16 __U, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_cvtepi32_ps + // CHECK-LABEL: test_mm512_mask_cvtepi32_ps // CHECK: sitofp <16 x i32> %{{.*}} to <16 x float> // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return _mm512_mask_cvtepi32_ps (__W,__U,__A); @@ -9387,7 +9389,7 @@ __m512 test_mm512_mask_cvtepi32_ps (__m512 __W, __mmask16 __U, __m512i __A) __m512 test_mm512_maskz_cvtepi32_ps (__mmask16 __U, __m512i __A) { - // CHECK-LABEL: @test_mm512_maskz_cvtepi32_ps + // CHECK-LABEL: test_mm512_maskz_cvtepi32_ps // CHECK: sitofp <16 x i32> %{{.*}} to <16 x float> // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return _mm512_maskz_cvtepi32_ps (__U,__A); @@ -9395,14 +9397,14 @@ __m512 test_mm512_maskz_cvtepi32_ps (__mmask16 __U, __m512i __A) __m512d test_mm512_cvtepu32_pd(__m256i __A) { - // CHECK-LABEL: @test_mm512_cvtepu32_pd + // CHECK-LABEL: test_mm512_cvtepu32_pd // CHECK: uitofp <8 x i32> %{{.*}} to <8 x double> return _mm512_cvtepu32_pd(__A); } __m512d test_mm512_mask_cvtepu32_pd (__m512d __W, __mmask8 __U, __m256i __A) { - // CHECK-LABEL: @test_mm512_mask_cvtepu32_pd + // CHECK-LABEL: test_mm512_mask_cvtepu32_pd // CHECK: uitofp <8 x i32> %{{.*}} to <8 x double> // CHECK: select <8 x i1> {{.*}}, <8 x double> {{.*}}, <8 x double> {{.*}} return _mm512_mask_cvtepu32_pd (__W,__U,__A); @@ -9410,7 +9412,7 @@ __m512d test_mm512_mask_cvtepu32_pd (__m512d __W, __mmask8 __U, __m256i __A) __m512d test_mm512_maskz_cvtepu32_pd (__mmask8 __U, __m256i __A) { - // CHECK-LABEL: @test_mm512_maskz_cvtepu32_pd + // CHECK-LABEL: test_mm512_maskz_cvtepu32_pd // CHECK: uitofp <8 x i32> %{{.*}} to <8 x double> // CHECK: select <8 x i1> {{.*}}, <8 x double> {{.*}}, <8 x double> {{.*}} return _mm512_maskz_cvtepu32_pd (__U,__A); @@ -9418,7 +9420,7 @@ __m512d test_mm512_maskz_cvtepu32_pd (__mmask8 __U, __m256i __A) __m512d test_mm512_cvtepu32lo_pd (__m512i __A) { - // CHECK-LABEL: @test_mm512_cvtepu32lo_pd + // CHECK-LABEL: test_mm512_cvtepu32lo_pd // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <4 x i32> // CHECK: uitofp <8 x i32> %{{.*}} to <8 x double> return _mm512_cvtepu32lo_pd (__A); @@ -9426,7 +9428,7 @@ __m512d test_mm512_cvtepu32lo_pd (__m512i __A) __m512d test_mm512_mask_cvtepu32lo_pd (__m512d __W, __mmask8 __U, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_cvtepu32lo_pd + // CHECK-LABEL: test_mm512_mask_cvtepu32lo_pd // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <4 x i32> // CHECK: uitofp <8 x i32> %{{.*}} to <8 x double> // CHECK: select <8 x i1> {{.*}}, <8 x double> {{.*}}, <8 x double> {{.*}} @@ -9435,21 +9437,21 @@ __m512d test_mm512_mask_cvtepu32lo_pd (__m512d __W, __mmask8 __U, __m512i __A) __m256 test_mm512_cvtpd_ps (__m512d __A) { - // CHECK-LABEL: @test_mm512_cvtpd_ps + // CHECK-LABEL: test_mm512_cvtpd_ps // CHECK: @llvm.x86.avx512.mask.cvtpd2ps.512 return _mm512_cvtpd_ps (__A); } __m256 test_mm512_mask_cvtpd_ps (__m256 __W, __mmask8 __U, __m512d __A) { - // CHECK-LABEL: @test_mm512_mask_cvtpd_ps + // CHECK-LABEL: test_mm512_mask_cvtpd_ps // CHECK: @llvm.x86.avx512.mask.cvtpd2ps.512 return _mm512_mask_cvtpd_ps (__W,__U,__A); } __m512 test_mm512_cvtpd_pslo(__m512d __A) { - // CHECK-LABEL: @test_mm512_cvtpd_pslo + // CHECK-LABEL: test_mm512_cvtpd_pslo // CHECK: @llvm.x86.avx512.mask.cvtpd2ps.512 // CHECK: zeroinitializer // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <16 x i32> @@ -9457,7 +9459,7 @@ __m512 test_mm512_cvtpd_pslo(__m512d __A) } __m512 test_mm512_mask_cvtpd_pslo(__m512 __W, __mmask8 __U, __m512d __A) { - // CHECK-LABEL: @test_mm512_mask_cvtpd_pslo + // CHECK-LABEL: test_mm512_mask_cvtpd_pslo // CHECK: @llvm.x86.avx512.mask.cvtpd2ps.512 // CHECK: zeroinitializer // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <16 x i32> @@ -9466,14 +9468,14 @@ __m512 test_mm512_mask_cvtpd_pslo(__m512 __W, __mmask8 __U, __m512d __A) { __m256 test_mm512_maskz_cvtpd_ps (__mmask8 __U, __m512d __A) { - // CHECK-LABEL: @test_mm512_maskz_cvtpd_ps + // CHECK-LABEL: test_mm512_maskz_cvtpd_ps // CHECK: @llvm.x86.avx512.mask.cvtpd2ps.512 return _mm512_maskz_cvtpd_ps (__U,__A); } __m512 test_mm512_cvtph_ps (__m256i __A) { - // CHECK-LABEL: @test_mm512_cvtph_ps + // CHECK-LABEL: test_mm512_cvtph_ps // CHECK: bitcast <4 x i64> %{{.*}} to <16 x i16> // CHECK: bitcast <16 x i16> %{{.*}} to <16 x half> // CHECK: fpext <16 x half> %{{.*}} to <16 x float> @@ -9482,7 +9484,7 @@ __m512 test_mm512_cvtph_ps (__m256i __A) __m512 test_mm512_mask_cvtph_ps (__m512 __W, __mmask16 __U, __m256i __A) { - // CHECK-LABEL: @test_mm512_mask_cvtph_ps + // CHECK-LABEL: test_mm512_mask_cvtph_ps // CHECK: bitcast <4 x i64> %{{.*}} to <16 x i16> // CHECK: bitcast <16 x i16> %{{.*}} to <16 x half> // CHECK: fpext <16 x half> %{{.*}} to <16 x float> @@ -9492,7 +9494,7 @@ __m512 test_mm512_mask_cvtph_ps (__m512 __W, __mmask16 __U, __m256i __A) __m512 test_mm512_maskz_cvtph_ps (__mmask16 __U, __m256i __A) { - // CHECK-LABEL: @test_mm512_maskz_cvtph_ps + // CHECK-LABEL: test_mm512_maskz_cvtph_ps // CHECK: bitcast <4 x i64> %{{.*}} to <16 x i16> // CHECK: bitcast <16 x i16> %{{.*}} to <16 x half> // CHECK: fpext <16 x half> %{{.*}} to <16 x float> @@ -9502,125 +9504,125 @@ __m512 test_mm512_maskz_cvtph_ps (__mmask16 __U, __m256i __A) __m256i test_mm512_mask_cvttpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A) { - // CHECK-LABEL: @test_mm512_mask_cvttpd_epi32 + // CHECK-LABEL: test_mm512_mask_cvttpd_epi32 // CHECK: @llvm.x86.avx512.mask.cvttpd2dq.512 return _mm512_mask_cvttpd_epi32 (__W,__U,__A); } __m256i test_mm512_maskz_cvttpd_epi32 (__mmask8 __U, __m512d __A) { - // CHECK-LABEL: @test_mm512_maskz_cvttpd_epi32 + // CHECK-LABEL: test_mm512_maskz_cvttpd_epi32 // CHECK: @llvm.x86.avx512.mask.cvttpd2dq.512 return _mm512_maskz_cvttpd_epi32 (__U,__A); } __m512i test_mm512_mask_cvttps_epi32 (__m512i __W, __mmask16 __U, __m512 __A) { - // CHECK-LABEL: @test_mm512_mask_cvttps_epi32 + // CHECK-LABEL: test_mm512_mask_cvttps_epi32 // CHECK: @llvm.x86.avx512.mask.cvttps2dq.512 return _mm512_mask_cvttps_epi32 (__W,__U,__A); } __m512i test_mm512_maskz_cvttps_epi32 (__mmask16 __U, __m512 __A) { - // CHECK-LABEL: @test_mm512_maskz_cvttps_epi32 + // CHECK-LABEL: test_mm512_maskz_cvttps_epi32 // CHECK: @llvm.x86.avx512.mask.cvttps2dq.512 return _mm512_maskz_cvttps_epi32 (__U,__A); } __m512i test_mm512_cvtps_epi32 (__m512 __A) { - // CHECK-LABEL: @test_mm512_cvtps_epi32 + // CHECK-LABEL: test_mm512_cvtps_epi32 // CHECK: @llvm.x86.avx512.mask.cvtps2dq.512 return _mm512_cvtps_epi32 (__A); } __m512i test_mm512_mask_cvtps_epi32 (__m512i __W, __mmask16 __U, __m512 __A) { - // CHECK-LABEL: @test_mm512_mask_cvtps_epi32 + // CHECK-LABEL: test_mm512_mask_cvtps_epi32 // CHECK: @llvm.x86.avx512.mask.cvtps2dq.512 return _mm512_mask_cvtps_epi32 (__W,__U,__A); } __m512i test_mm512_maskz_cvtps_epi32 (__mmask16 __U, __m512 __A) { - // CHECK-LABEL: @test_mm512_maskz_cvtps_epi32 + // CHECK-LABEL: test_mm512_maskz_cvtps_epi32 // CHECK: @llvm.x86.avx512.mask.cvtps2dq.512 return _mm512_maskz_cvtps_epi32 (__U,__A); } __m256i test_mm512_cvtpd_epi32 (__m512d __A) { - // CHECK-LABEL: @test_mm512_cvtpd_epi32 + // CHECK-LABEL: test_mm512_cvtpd_epi32 // CHECK: @llvm.x86.avx512.mask.cvtpd2dq.512 return _mm512_cvtpd_epi32 (__A); } __m256i test_mm512_mask_cvtpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A) { - // CHECK-LABEL: @test_mm512_mask_cvtpd_epi32 + // CHECK-LABEL: test_mm512_mask_cvtpd_epi32 // CHECK: @llvm.x86.avx512.mask.cvtpd2dq.512 return _mm512_mask_cvtpd_epi32 (__W,__U,__A); } __m256i test_mm512_maskz_cvtpd_epi32 (__mmask8 __U, __m512d __A) { - // CHECK-LABEL: @test_mm512_maskz_cvtpd_epi32 + // CHECK-LABEL: test_mm512_maskz_cvtpd_epi32 // CHECK: @llvm.x86.avx512.mask.cvtpd2dq.512 return _mm512_maskz_cvtpd_epi32 (__U,__A); } __m256i test_mm512_cvtpd_epu32 (__m512d __A) { - // CHECK-LABEL: @test_mm512_cvtpd_epu32 + // CHECK-LABEL: test_mm512_cvtpd_epu32 // CHECK: @llvm.x86.avx512.mask.cvtpd2udq.512 return _mm512_cvtpd_epu32 (__A); } __m256i test_mm512_mask_cvtpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A) { - // CHECK-LABEL: @test_mm512_mask_cvtpd_epu32 + // CHECK-LABEL: test_mm512_mask_cvtpd_epu32 // CHECK: @llvm.x86.avx512.mask.cvtpd2udq.512 return _mm512_mask_cvtpd_epu32 (__W,__U,__A); } __m256i test_mm512_maskz_cvtpd_epu32 (__mmask8 __U, __m512d __A) { - // CHECK-LABEL: @test_mm512_maskz_cvtpd_epu32 + // CHECK-LABEL: test_mm512_maskz_cvtpd_epu32 // CHECK: @llvm.x86.avx512.mask.cvtpd2udq.512 return _mm512_maskz_cvtpd_epu32 (__U,__A); } __m256i test_mm512_mask_cvtps_ph(__m256i src, __mmask16 k, __m512 a) { - // CHECK-LABEL: @test_mm512_mask_cvtps_ph + // CHECK-LABEL: test_mm512_mask_cvtps_ph // CHECK: @llvm.x86.avx512.mask.vcvtps2ph.512 return _mm512_mask_cvtps_ph(src, k, a,_MM_FROUND_TO_ZERO); } __m256i test_mm512_maskz_cvtps_ph (__mmask16 k, __m512 a) { - // CHECK-LABEL: @test_mm512_maskz_cvtps_ph + // CHECK-LABEL: test_mm512_maskz_cvtps_ph // CHECK: @llvm.x86.avx512.mask.vcvtps2ph.512 return _mm512_maskz_cvtps_ph( k, a,_MM_FROUND_TO_ZERO); } __m512i test_mm512_cvtps_epu32 ( __m512 __A) { - // CHECK-LABEL: @test_mm512_cvtps_epu32 + // CHECK-LABEL: test_mm512_cvtps_epu32 // CHECK: @llvm.x86.avx512.mask.cvtps2udq.512 return _mm512_cvtps_epu32(__A); } __m512i test_mm512_mask_cvtps_epu32 (__m512i __W, __mmask16 __U, __m512 __A) { - // CHECK-LABEL: @test_mm512_mask_cvtps_epu32 + // CHECK-LABEL: test_mm512_mask_cvtps_epu32 // CHECK: @llvm.x86.avx512.mask.cvtps2udq.512 return _mm512_mask_cvtps_epu32( __W, __U, __A); } __m512i test_mm512_maskz_cvtps_epu32 (__mmask16 __U, __m512 __A) { - // CHECK-LABEL: @test_mm512_maskz_cvtps_epu32 + // CHECK-LABEL: test_mm512_maskz_cvtps_epu32 // CHECK: @llvm.x86.avx512.mask.cvtps2udq.512 return _mm512_maskz_cvtps_epu32( __U, __A); } @@ -9639,7 +9641,7 @@ float test_mm512_cvtss_f32(__m512 A) { __m512d test_mm512_mask_max_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { - // CHECK-LABEL: @test_mm512_mask_max_pd + // CHECK-LABEL: test_mm512_mask_max_pd // CHECK: @llvm.x86.avx512.max.pd.512 // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} return _mm512_mask_max_pd (__W,__U,__A,__B); @@ -9647,7 +9649,7 @@ __m512d test_mm512_mask_max_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __m512d test_mm512_maskz_max_pd (__mmask8 __U, __m512d __A, __m512d __B) { - // CHECK-LABEL: @test_mm512_maskz_max_pd + // CHECK-LABEL: test_mm512_maskz_max_pd // CHECK: @llvm.x86.avx512.max.pd.512 // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} return _mm512_maskz_max_pd (__U,__A,__B); @@ -9655,7 +9657,7 @@ __m512d test_mm512_maskz_max_pd (__mmask8 __U, __m512d __A, __m512d __B) __m512 test_mm512_mask_max_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { - // CHECK-LABEL: @test_mm512_mask_max_ps + // CHECK-LABEL: test_mm512_mask_max_ps // CHECK: @llvm.x86.avx512.max.ps.512 // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return _mm512_mask_max_ps (__W,__U,__A,__B); @@ -9663,7 +9665,7 @@ __m512 test_mm512_mask_max_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B __m512d test_mm512_mask_max_round_pd(__m512d __W,__mmask8 __U,__m512d __A,__m512d __B) { - // CHECK-LABEL: @test_mm512_mask_max_round_pd + // CHECK-LABEL: test_mm512_mask_max_round_pd // CHECK: @llvm.x86.avx512.max.pd.512 // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} return _mm512_mask_max_round_pd(__W,__U,__A,__B,_MM_FROUND_NO_EXC); @@ -9671,7 +9673,7 @@ __m512d test_mm512_mask_max_round_pd(__m512d __W,__mmask8 __U,__m512d __A,__m512 __m512d test_mm512_maskz_max_round_pd(__mmask8 __U,__m512d __A,__m512d __B) { - // CHECK-LABEL: @test_mm512_maskz_max_round_pd + // CHECK-LABEL: test_mm512_maskz_max_round_pd // CHECK: @llvm.x86.avx512.max.pd.512 // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} return _mm512_maskz_max_round_pd(__U,__A,__B,_MM_FROUND_NO_EXC); @@ -9679,14 +9681,14 @@ __m512d test_mm512_maskz_max_round_pd(__mmask8 __U,__m512d __A,__m512d __B) __m512d test_mm512_max_round_pd(__m512d __A,__m512d __B) { - // CHECK-LABEL: @test_mm512_max_round_pd + // CHECK-LABEL: test_mm512_max_round_pd // CHECK: @llvm.x86.avx512.max.pd.512 return _mm512_max_round_pd(__A,__B,_MM_FROUND_NO_EXC); } __m512 test_mm512_maskz_max_ps (__mmask16 __U, __m512 __A, __m512 __B) { - // CHECK-LABEL: @test_mm512_maskz_max_ps + // CHECK-LABEL: test_mm512_maskz_max_ps // CHECK: @llvm.x86.avx512.max.ps.512 // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return _mm512_maskz_max_ps (__U,__A,__B); @@ -9694,7 +9696,7 @@ __m512 test_mm512_maskz_max_ps (__mmask16 __U, __m512 __A, __m512 __B) __m512 test_mm512_mask_max_round_ps(__m512 __W,__mmask16 __U,__m512 __A,__m512 __B) { - // CHECK-LABEL: @test_mm512_mask_max_round_ps + // CHECK-LABEL: test_mm512_mask_max_round_ps // CHECK: @llvm.x86.avx512.max.ps.512 // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return _mm512_mask_max_round_ps(__W,__U,__A,__B,_MM_FROUND_NO_EXC); @@ -9702,7 +9704,7 @@ __m512 test_mm512_mask_max_round_ps(__m512 __W,__mmask16 __U,__m512 __A,__m512 _ __m512 test_mm512_maskz_max_round_ps(__mmask16 __U,__m512 __A,__m512 __B) { - // CHECK-LABEL: @test_mm512_maskz_max_round_ps + // CHECK-LABEL: test_mm512_maskz_max_round_ps // CHECK: @llvm.x86.avx512.max.ps.512 // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return _mm512_maskz_max_round_ps(__U,__A,__B,_MM_FROUND_NO_EXC); @@ -9710,14 +9712,14 @@ __m512 test_mm512_maskz_max_round_ps(__mmask16 __U,__m512 __A,__m512 __B) __m512 test_mm512_max_round_ps(__m512 __A,__m512 __B) { - // CHECK-LABEL: @test_mm512_max_round_ps + // CHECK-LABEL: test_mm512_max_round_ps // CHECK: @llvm.x86.avx512.max.ps.512 return _mm512_max_round_ps(__A,__B,_MM_FROUND_NO_EXC); } __m512d test_mm512_mask_min_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { - // CHECK-LABEL: @test_mm512_mask_min_pd + // CHECK-LABEL: test_mm512_mask_min_pd // CHECK: @llvm.x86.avx512.min.pd.512 // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} return _mm512_mask_min_pd (__W,__U,__A,__B); @@ -9725,14 +9727,14 @@ __m512d test_mm512_mask_min_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __m512d test_mm512_maskz_min_pd (__mmask8 __U, __m512d __A, __m512d __B) { - // CHECK-LABEL: @test_mm512_maskz_min_pd + // CHECK-LABEL: test_mm512_maskz_min_pd // CHECK: @llvm.x86.avx512.min.pd.512 return _mm512_maskz_min_pd (__U,__A,__B); } __m512d test_mm512_mask_min_round_pd(__m512d __W,__mmask8 __U,__m512d __A,__m512d __B) { - // CHECK-LABEL: @test_mm512_mask_min_round_pd + // CHECK-LABEL: test_mm512_mask_min_round_pd // CHECK: @llvm.x86.avx512.min.pd.512 // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} return _mm512_mask_min_round_pd(__W,__U,__A,__B,_MM_FROUND_NO_EXC); @@ -9740,7 +9742,7 @@ __m512d test_mm512_mask_min_round_pd(__m512d __W,__mmask8 __U,__m512d __A,__m512 __m512d test_mm512_maskz_min_round_pd(__mmask8 __U,__m512d __A,__m512d __B) { - // CHECK-LABEL: @test_mm512_maskz_min_round_pd + // CHECK-LABEL: test_mm512_maskz_min_round_pd // CHECK: @llvm.x86.avx512.min.pd.512 // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} return _mm512_maskz_min_round_pd(__U,__A,__B,_MM_FROUND_NO_EXC); @@ -9748,14 +9750,14 @@ __m512d test_mm512_maskz_min_round_pd(__mmask8 __U,__m512d __A,__m512d __B) __m512d test_mm512_min_round_pd( __m512d __A,__m512d __B) { - // CHECK-LABEL: @test_mm512_min_round_pd + // CHECK-LABEL: test_mm512_min_round_pd // CHECK: @llvm.x86.avx512.min.pd.512 return _mm512_min_round_pd(__A,__B,_MM_FROUND_NO_EXC); } __m512 test_mm512_mask_min_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { - // CHECK-LABEL: @test_mm512_mask_min_ps + // CHECK-LABEL: test_mm512_mask_min_ps // CHECK: @llvm.x86.avx512.min.ps.512 // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return _mm512_mask_min_ps (__W,__U,__A,__B); @@ -9763,7 +9765,7 @@ __m512 test_mm512_mask_min_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B __m512 test_mm512_maskz_min_ps (__mmask16 __U, __m512 __A, __m512 __B) { - // CHECK-LABEL: @test_mm512_maskz_min_ps + // CHECK-LABEL: test_mm512_maskz_min_ps // CHECK: @llvm.x86.avx512.min.ps.512 // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return _mm512_maskz_min_ps (__U,__A,__B); @@ -9771,7 +9773,7 @@ __m512 test_mm512_maskz_min_ps (__mmask16 __U, __m512 __A, __m512 __B) __m512 test_mm512_mask_min_round_ps(__m512 __W,__mmask16 __U,__m512 __A,__m512 __B) { - // CHECK-LABEL: @test_mm512_mask_min_round_ps + // CHECK-LABEL: test_mm512_mask_min_round_ps // CHECK: @llvm.x86.avx512.min.ps.512 // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return _mm512_mask_min_round_ps(__W,__U,__A,__B,_MM_FROUND_NO_EXC); @@ -9779,7 +9781,7 @@ __m512 test_mm512_mask_min_round_ps(__m512 __W,__mmask16 __U,__m512 __A,__m512 _ __m512 test_mm512_maskz_min_round_ps(__mmask16 __U,__m512 __A,__m512 __B) { - // CHECK-LABEL: @test_mm512_maskz_min_round_ps + // CHECK-LABEL: test_mm512_maskz_min_round_ps // CHECK: @llvm.x86.avx512.min.ps.512 // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return _mm512_maskz_min_round_ps(__U,__A,__B,_MM_FROUND_NO_EXC); @@ -9787,296 +9789,296 @@ __m512 test_mm512_maskz_min_round_ps(__mmask16 __U,__m512 __A,__m512 __B) __m512 test_mm512_min_round_ps(__m512 __A,__m512 __B) { - // CHECK-LABEL: @test_mm512_min_round_ps + // CHECK-LABEL: test_mm512_min_round_ps // CHECK: @llvm.x86.avx512.min.ps.512 return _mm512_min_round_ps(__A,__B,_MM_FROUND_NO_EXC); } __m512 test_mm512_mask_floor_ps (__m512 __W, __mmask16 __U, __m512 __A) { - // CHECK-LABEL: @test_mm512_mask_floor_ps + // CHECK-LABEL: test_mm512_mask_floor_ps // CHECK: @llvm.x86.avx512.mask.rndscale.ps.512 return _mm512_mask_floor_ps (__W,__U,__A); } __m512d test_mm512_mask_floor_pd (__m512d __W, __mmask8 __U, __m512d __A) { - // CHECK-LABEL: @test_mm512_mask_floor_pd + // CHECK-LABEL: test_mm512_mask_floor_pd // CHECK: @llvm.x86.avx512.mask.rndscale.pd.512 return _mm512_mask_floor_pd (__W,__U,__A); } __m512 test_mm512_mask_ceil_ps (__m512 __W, __mmask16 __U, __m512 __A) { - // CHECK-LABEL: @test_mm512_mask_ceil_ps + // CHECK-LABEL: test_mm512_mask_ceil_ps // CHECK: @llvm.x86.avx512.mask.rndscale.ps.512 return _mm512_mask_ceil_ps (__W,__U,__A); } __m512d test_mm512_mask_ceil_pd (__m512d __W, __mmask8 __U, __m512d __A) { - // CHECK-LABEL: @test_mm512_mask_ceil_pd + // CHECK-LABEL: test_mm512_mask_ceil_pd // CHECK: @llvm.x86.avx512.mask.rndscale.pd.512 return _mm512_mask_ceil_pd (__W,__U,__A); } __m512 test_mm512_mask_roundscale_ps(__m512 __W, __mmask16 __U, __m512 __A) { - // CHECK-LABEL: @test_mm512_mask_roundscale_ps + // CHECK-LABEL: test_mm512_mask_roundscale_ps // CHECK: @llvm.x86.avx512.mask.rndscale.ps.512 return _mm512_mask_roundscale_ps(__W,__U,__A, 1); } __m512 test_mm512_maskz_roundscale_ps(__mmask16 __U, __m512 __A) { - // CHECK-LABEL: @test_mm512_maskz_roundscale_ps + // CHECK-LABEL: test_mm512_maskz_roundscale_ps // CHECK: @llvm.x86.avx512.mask.rndscale.ps.512 return _mm512_maskz_roundscale_ps(__U,__A, 1); } __m512 test_mm512_mask_roundscale_round_ps(__m512 __A,__mmask16 __U,__m512 __C) { - // CHECK-LABEL: @test_mm512_mask_roundscale_round_ps + // CHECK-LABEL: test_mm512_mask_roundscale_round_ps // CHECK: @llvm.x86.avx512.mask.rndscale.ps.512 return _mm512_mask_roundscale_round_ps(__A,__U,__C,_MM_FROUND_TO_ZERO,_MM_FROUND_NO_EXC); } __m512 test_mm512_maskz_roundscale_round_ps(__m512 __A,__mmask16 __U) { - // CHECK-LABEL: @test_mm512_maskz_roundscale_round_ps + // CHECK-LABEL: test_mm512_maskz_roundscale_round_ps // CHECK: @llvm.x86.avx512.mask.rndscale.ps.512 return _mm512_maskz_roundscale_round_ps(__U,__A,_MM_FROUND_TO_ZERO,_MM_FROUND_NO_EXC); } __m512 test_mm512_roundscale_round_ps(__m512 __A) { - // CHECK-LABEL: @test_mm512_roundscale_round_ps + // CHECK-LABEL: test_mm512_roundscale_round_ps // CHECK: @llvm.x86.avx512.mask.rndscale.ps.512 return _mm512_roundscale_round_ps(__A,_MM_FROUND_TO_ZERO,_MM_FROUND_NO_EXC); } __m512d test_mm512_mask_roundscale_pd(__m512d __W, __mmask8 __U, __m512d __A) { - // CHECK-LABEL: @test_mm512_mask_roundscale_pd + // CHECK-LABEL: test_mm512_mask_roundscale_pd // CHECK: @llvm.x86.avx512.mask.rndscale.pd.512 return _mm512_mask_roundscale_pd(__W,__U,__A, 1); } __m512d test_mm512_maskz_roundscale_pd(__mmask8 __U, __m512d __A) { - // CHECK-LABEL: @test_mm512_maskz_roundscale_pd + // CHECK-LABEL: test_mm512_maskz_roundscale_pd // CHECK: @llvm.x86.avx512.mask.rndscale.pd.512 return _mm512_maskz_roundscale_pd(__U,__A, 1); } __m512d test_mm512_mask_roundscale_round_pd(__m512d __A,__mmask8 __U,__m512d __C) { - // CHECK-LABEL: @test_mm512_mask_roundscale_round_pd + // CHECK-LABEL: test_mm512_mask_roundscale_round_pd // CHECK: @llvm.x86.avx512.mask.rndscale.pd.512 return _mm512_mask_roundscale_round_pd(__A,__U,__C,_MM_FROUND_TO_ZERO,_MM_FROUND_NO_EXC); } __m512d test_mm512_maskz_roundscale_round_pd(__m512d __A,__mmask8 __U) { - // CHECK-LABEL: @test_mm512_maskz_roundscale_round_pd + // CHECK-LABEL: test_mm512_maskz_roundscale_round_pd // CHECK: @llvm.x86.avx512.mask.rndscale.pd.512 return _mm512_maskz_roundscale_round_pd(__U,__A,_MM_FROUND_TO_ZERO,_MM_FROUND_NO_EXC); } __m512d test_mm512_roundscale_round_pd(__m512d __A) { - // CHECK-LABEL: @test_mm512_roundscale_round_pd + // CHECK-LABEL: test_mm512_roundscale_round_pd // CHECK: @llvm.x86.avx512.mask.rndscale.pd.512 return _mm512_roundscale_round_pd(__A,_MM_FROUND_TO_ZERO,_MM_FROUND_NO_EXC); } __m512i test_mm512_max_epi32 (__m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_max_epi32 - // CHECK: [[RES:%.*]] = call <16 x i32> @llvm.smax.v16i32(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}) + // CHECK-LABEL: test_mm512_max_epi32 + // CHECK: call <16 x i32> @llvm.smax.v16i32(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}) return _mm512_max_epi32 (__A,__B); } __m512i test_mm512_mask_max_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_mask_max_epi32 - // CHECK: [[RES:%.*]] = call <16 x i32> @llvm.smax.v16i32(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}) - // CHECK: select <16 x i1> {{.*}}, <16 x i32> [[RES]], <16 x i32> {{.*}} + // CHECK-LABEL: test_mm512_mask_max_epi32 + // CHECK: call <16 x i32> @llvm.smax.v16i32(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}) + // CHECK: select <16 x i1> {{.*}}, <16 x i32> {{.*}}, <16 x i32> {{.*}} return _mm512_mask_max_epi32 (__W,__M,__A,__B); } __m512i test_mm512_maskz_max_epi32 (__mmask16 __M, __m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_maskz_max_epi32 - // CHECK: [[RES:%.*]] = call <16 x i32> @llvm.smax.v16i32(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}) - // CHECK: select <16 x i1> {{.*}}, <16 x i32> [[RES]], <16 x i32> {{.*}} + // CHECK-LABEL: test_mm512_maskz_max_epi32 + // CHECK: call <16 x i32> @llvm.smax.v16i32(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}) + // CHECK: select <16 x i1> {{.*}}, <16 x i32> {{.*}}, <16 x i32> {{.*}} return _mm512_maskz_max_epi32 (__M,__A,__B); } __m512i test_mm512_max_epi64 (__m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_max_epi64 - // CHECK: [[RES:%.*]] = call <8 x i64> @llvm.smax.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}) + // CHECK-LABEL: test_mm512_max_epi64 + // CHECK: call {{.*}}<8 x i64> @llvm.smax.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}) return _mm512_max_epi64 (__A,__B); } __m512i test_mm512_mask_max_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_mask_max_epi64 - // CHECK: [[RES:%.*]] = call <8 x i64> @llvm.smax.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}) - // CHECK: select <8 x i1> {{.*}}, <8 x i64> [[RES]], <8 x i64> {{.*}} + // CHECK-LABEL: test_mm512_mask_max_epi64 + // CHECK: call {{.*}}<8 x i64> @llvm.smax.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}) + // CHECK: select <8 x i1> {{.*}}, <8 x i64> {{.*}}, <8 x i64> {{.*}} return _mm512_mask_max_epi64 (__W,__M,__A,__B); } __m512i test_mm512_maskz_max_epi64 (__mmask8 __M, __m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_maskz_max_epi64 - // CHECK: [[RES:%.*]] = call <8 x i64> @llvm.smax.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}) - // CHECK: select <8 x i1> {{.*}}, <8 x i64> [[RES]], <8 x i64> {{.*}} + // CHECK-LABEL: test_mm512_maskz_max_epi64 + // CHECK: call {{.*}}<8 x i64> @llvm.smax.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}) + // CHECK: select <8 x i1> {{.*}}, <8 x i64> {{.*}}, <8 x i64> {{.*}} return _mm512_maskz_max_epi64 (__M,__A,__B); } __m512i test_mm512_max_epu64 (__m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_max_epu64 - // CHECK: [[RES:%.*]] = call <8 x i64> @llvm.umax.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}) + // CHECK-LABEL: test_mm512_max_epu64 + // CHECK: call {{.*}}<8 x i64> @llvm.umax.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}) return _mm512_max_epu64 (__A,__B); } __m512i test_mm512_mask_max_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_mask_max_epu64 - // CHECK: [[RES:%.*]] = call <8 x i64> @llvm.umax.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}) - // CHECK: select <8 x i1> {{.*}}, <8 x i64> [[RES]], <8 x i64> {{.*}} + // CHECK-LABEL: test_mm512_mask_max_epu64 + // CHECK: call {{.*}}<8 x i64> @llvm.umax.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}) + // CHECK: select <8 x i1> {{.*}}, <8 x i64> {{.*}}, <8 x i64> {{.*}} return _mm512_mask_max_epu64 (__W,__M,__A,__B); } __m512i test_mm512_maskz_max_epu64 (__mmask8 __M, __m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_maskz_max_epu64 - // CHECK: [[RES:%.*]] = call <8 x i64> @llvm.umax.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}) - // CHECK: select <8 x i1> {{.*}}, <8 x i64> [[RES]], <8 x i64> {{.*}} + // CHECK-LABEL: test_mm512_maskz_max_epu64 + // CHECK: call {{.*}}<8 x i64> @llvm.umax.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}) + // CHECK: select <8 x i1> {{.*}}, <8 x i64> {{.*}}, <8 x i64> {{.*}} return _mm512_maskz_max_epu64 (__M,__A,__B); } __m512i test_mm512_max_epu32 (__m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_max_epu32 - // CHECK: [[RES:%.*]] = call <16 x i32> @llvm.umax.v16i32(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}) + // CHECK-LABEL: test_mm512_max_epu32 + // CHECK: call <16 x i32> @llvm.umax.v16i32(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}) return _mm512_max_epu32 (__A,__B); } __m512i test_mm512_mask_max_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_mask_max_epu32 - // CHECK: [[RES:%.*]] = call <16 x i32> @llvm.umax.v16i32(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}) - // CHECK: select <16 x i1> {{.*}}, <16 x i32> [[RES]], <16 x i32> {{.*}} + // CHECK-LABEL: test_mm512_mask_max_epu32 + // CHECK: call <16 x i32> @llvm.umax.v16i32(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}) + // CHECK: select <16 x i1> {{.*}}, <16 x i32> {{.*}}, <16 x i32> {{.*}} return _mm512_mask_max_epu32 (__W,__M,__A,__B); } __m512i test_mm512_maskz_max_epu32 (__mmask16 __M, __m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_maskz_max_epu32 - // CHECK: [[RES:%.*]] = call <16 x i32> @llvm.umax.v16i32(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}) - // CHECK: select <16 x i1> {{.*}}, <16 x i32> [[RES]], <16 x i32> {{.*}} + // CHECK-LABEL: test_mm512_maskz_max_epu32 + // CHECK: call <16 x i32> @llvm.umax.v16i32(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}) + // CHECK: select <16 x i1> {{.*}}, <16 x i32> {{.*}}, <16 x i32> {{.*}} return _mm512_maskz_max_epu32 (__M,__A,__B); } __m512i test_mm512_min_epi32 (__m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_min_epi32 - // CHECK: [[RES:%.*]] = call <16 x i32> @llvm.smin.v16i32(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}) + // CHECK-LABEL: test_mm512_min_epi32 + // CHECK: call <16 x i32> @llvm.smin.v16i32(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}) return _mm512_min_epi32 (__A,__B); } __m512i test_mm512_mask_min_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_mask_min_epi32 - // CHECK: [[RES:%.*]] = call <16 x i32> @llvm.smin.v16i32(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}) - // CHECK: select <16 x i1> {{.*}}, <16 x i32> [[RES]], <16 x i32> {{.*}} + // CHECK-LABEL: test_mm512_mask_min_epi32 + // CHECK: call <16 x i32> @llvm.smin.v16i32(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}) + // CHECK: select <16 x i1> {{.*}}, <16 x i32> {{.*}}, <16 x i32> {{.*}} return _mm512_mask_min_epi32 (__W,__M,__A,__B); } __m512i test_mm512_maskz_min_epi32 (__mmask16 __M, __m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_maskz_min_epi32 - // CHECK: [[RES:%.*]] = call <16 x i32> @llvm.smin.v16i32(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}) - // CHECK: select <16 x i1> {{.*}}, <16 x i32> [[RES]], <16 x i32> {{.*}} + // CHECK-LABEL: test_mm512_maskz_min_epi32 + // CHECK: call <16 x i32> @llvm.smin.v16i32(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}) + // CHECK: select <16 x i1> {{.*}}, <16 x i32> {{.*}}, <16 x i32> {{.*}} return _mm512_maskz_min_epi32 (__M,__A,__B); } __m512i test_mm512_min_epu32 (__m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_min_epu32 - // CHECK: [[RES:%.*]] = call <16 x i32> @llvm.umin.v16i32(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}) + // CHECK-LABEL: test_mm512_min_epu32 + // CHECK: call <16 x i32> @llvm.umin.v16i32(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}) return _mm512_min_epu32 (__A,__B); } __m512i test_mm512_mask_min_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_mask_min_epu32 - // CHECK: [[RES:%.*]] = call <16 x i32> @llvm.umin.v16i32(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}) - // CHECK: select <16 x i1> {{.*}}, <16 x i32> [[RES]], <16 x i32> {{.*}} + // CHECK-LABEL: test_mm512_mask_min_epu32 + // CHECK: call <16 x i32> @llvm.umin.v16i32(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}) + // CHECK: select <16 x i1> {{.*}}, <16 x i32> {{.*}}, <16 x i32> {{.*}} return _mm512_mask_min_epu32 (__W,__M,__A,__B); } __m512i test_mm512_maskz_min_epu32 (__mmask16 __M, __m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_maskz_min_epu32 - // CHECK: [[RES:%.*]] = call <16 x i32> @llvm.umin.v16i32(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}) - // CHECK: select <16 x i1> {{.*}}, <16 x i32> [[RES]], <16 x i32> {{.*}} + // CHECK-LABEL: test_mm512_maskz_min_epu32 + // CHECK: call <16 x i32> @llvm.umin.v16i32(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}) + // CHECK: select <16 x i1> {{.*}}, <16 x i32> {{.*}}, <16 x i32> {{.*}} return _mm512_maskz_min_epu32 (__M,__A,__B); } __m512i test_mm512_min_epi64 (__m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_min_epi64 - // CHECK: [[RES:%.*]] = call <8 x i64> @llvm.smin.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}) + // CHECK-LABEL: test_mm512_min_epi64 + // CHECK: call {{.*}}<8 x i64> @llvm.smin.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}) return _mm512_min_epi64 (__A,__B); } __m512i test_mm512_mask_min_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_mask_min_epi64 - // CHECK: [[RES:%.*]] = call <8 x i64> @llvm.smin.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}) - // CHECK: select <8 x i1> {{.*}}, <8 x i64> [[RES]], <8 x i64> {{.*}} + // CHECK-LABEL: test_mm512_mask_min_epi64 + // CHECK: call {{.*}}<8 x i64> @llvm.smin.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}) + // CHECK: select <8 x i1> {{.*}}, <8 x i64> {{.*}}, <8 x i64> {{.*}} return _mm512_mask_min_epi64 (__W,__M,__A,__B); } __m512i test_mm512_maskz_min_epi64 (__mmask8 __M, __m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_maskz_min_epi64 - // CHECK: [[RES:%.*]] = call <8 x i64> @llvm.smin.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}) - // CHECK: select <8 x i1> {{.*}}, <8 x i64> [[RES]], <8 x i64> {{.*}} + // CHECK-LABEL: test_mm512_maskz_min_epi64 + // CHECK: call {{.*}}<8 x i64> @llvm.smin.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}) + // CHECK: select <8 x i1> {{.*}}, <8 x i64> {{.*}}, <8 x i64> {{.*}} return _mm512_maskz_min_epi64 (__M,__A,__B); } __m512i test_mm512_min_epu64 (__m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_min_epu64 - // CHECK: [[RES:%.*]] = call <8 x i64> @llvm.umin.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}) + // CHECK-LABEL: test_mm512_min_epu64 + // CHECK: call {{.*}}<8 x i64> @llvm.umin.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}) return _mm512_min_epu64 (__A,__B); } __m512i test_mm512_mask_min_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_mask_min_epu64 - // CHECK: [[RES:%.*]] = call <8 x i64> @llvm.umin.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}) - // CHECK: select <8 x i1> {{.*}}, <8 x i64> [[RES]], <8 x i64> {{.*}} + // CHECK-LABEL: test_mm512_mask_min_epu64 + // CHECK: call {{.*}}<8 x i64> @llvm.umin.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}) + // CHECK: select <8 x i1> {{.*}}, <8 x i64> {{.*}}, <8 x i64> {{.*}} return _mm512_mask_min_epu64 (__W,__M,__A,__B); } __m512i test_mm512_maskz_min_epu64 (__mmask8 __M, __m512i __A, __m512i __B) { - // CHECK-LABEL: @test_mm512_maskz_min_epu64 - // CHECK: [[RES:%.*]] = call <8 x i64> @llvm.umin.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}) - // CHECK: select <8 x i1> {{.*}}, <8 x i64> [[RES]], <8 x i64> {{.*}} + // CHECK-LABEL: test_mm512_maskz_min_epu64 + // CHECK: call {{.*}}<8 x i64> @llvm.umin.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}) + // CHECK: select <8 x i1> {{.*}}, <8 x i64> {{.*}}, <8 x i64> {{.*}} return _mm512_maskz_min_epu64 (__M,__A,__B); } __m512i test_mm512_mask_set1_epi32 (__m512i __O, __mmask16 __M, int __A) { - // CHECK-LABEL: @test_mm512_mask_set1_epi32 + // CHECK-LABEL: test_mm512_mask_set1_epi32 // CHECK: insertelement <16 x i32> poison, i32 %{{.*}}, i32 0 // CHECK: insertelement <16 x i32> %{{.*}}, i32 %{{.*}}, i32 1 // CHECK: insertelement <16 x i32> %{{.*}}, i32 %{{.*}}, i32 2 @@ -10099,7 +10101,7 @@ __m512i test_mm512_mask_set1_epi32 (__m512i __O, __mmask16 __M, int __A) __m512i test_mm512_maskz_set1_epi32(__mmask16 __M, int __A) { - // CHECK-LABEL: @test_mm512_maskz_set1_epi32 + // CHECK-LABEL: test_mm512_maskz_set1_epi32 // CHECK: insertelement <16 x i32> poison, i32 %{{.*}}, i32 0 // CHECK: insertelement <16 x i32> %{{.*}}, i32 %{{.*}}, i32 1 // CHECK: insertelement <16 x i32> %{{.*}}, i32 %{{.*}}, i32 2 @@ -10132,7 +10134,7 @@ __m512i test_mm512_set_epi8(char e63, char e62, char e61, char e60, char e59, char e9, char e8, char e7, char e6, char e5, char e4, char e3, char e2, char e1, char e0) { - //CHECK-LABEL: @test_mm512_set_epi8 + //CHECK-LABEL: test_mm512_set_epi8 //CHECK: load i8, ptr %{{.*}}, align 1 //CHECK: load i8, ptr %{{.*}}, align 1 //CHECK: load i8, ptr %{{.*}}, align 1 @@ -10210,7 +10212,7 @@ __m512i test_mm512_set_epi16(short e31, short e30, short e29, short e28, short e16, short e15, short e14, short e13, short e12, short e11, short e10, short e9, short e8, short e7, short e6, short e5, short e4, short e3, short e2, short e1, short e0) { - //CHECK-LABEL: @test_mm512_set_epi16 + //CHECK-LABEL: test_mm512_set_epi16 //CHECK: insertelement{{.*}}i32 0 //CHECK: insertelement{{.*}}i32 1 //CHECK: insertelement{{.*}}i32 2 @@ -10253,7 +10255,7 @@ __m512i test_mm512_set_epi32 (int __A, int __B, int __C, int __D, int __I, int __J, int __K, int __L, int __M, int __N, int __O, int __P) { - //CHECK-LABEL: @test_mm512_set_epi32 + //CHECK-LABEL: test_mm512_set_epi32 //CHECK: insertelement{{.*}}i32 0 //CHECK: insertelement{{.*}}i32 1 //CHECK: insertelement{{.*}}i32 2 @@ -10279,7 +10281,7 @@ __m512i test_mm512_setr_epi32 (int __A, int __B, int __C, int __D, int __I, int __J, int __K, int __L, int __M, int __N, int __O, int __P) { - //CHECK-LABEL: @test_mm512_setr_epi32 + //CHECK-LABEL: test_mm512_setr_epi32 //CHECK: load{{.*}}%{{.*}}, align 4 //CHECK: load{{.*}}%{{.*}}, align 4 //CHECK: load{{.*}}%{{.*}}, align 4 @@ -10318,7 +10320,7 @@ __m512i test_mm512_setr_epi32 (int __A, int __B, int __C, int __D, __m512i test_mm512_mask_set1_epi64 (__m512i __O, __mmask8 __M, long long __A) { - // CHECK-LABEL: @test_mm512_mask_set1_epi64 + // CHECK-LABEL: test_mm512_mask_set1_epi64 // CHECK: insertelement <8 x i64> poison, i64 %{{.*}}, i32 0 // CHECK: insertelement <8 x i64> %{{.*}}, i64 %{{.*}}, i32 1 // CHECK: insertelement <8 x i64> %{{.*}}, i64 %{{.*}}, i32 2 @@ -10333,7 +10335,7 @@ __m512i test_mm512_mask_set1_epi64 (__m512i __O, __mmask8 __M, long long __A) __m512i test_mm512_maskz_set1_epi64 (__mmask8 __M, long long __A) { - // CHECK-LABEL: @test_mm512_maskz_set1_epi64 + // CHECK-LABEL: test_mm512_maskz_set1_epi64 // CHECK: insertelement <8 x i64> poison, i64 %{{.*}}, i32 0 // CHECK: insertelement <8 x i64> %{{.*}}, i64 %{{.*}}, i32 1 // CHECK: insertelement <8 x i64> %{{.*}}, i64 %{{.*}}, i32 2 @@ -10351,7 +10353,7 @@ __m512i test_mm512_set_epi64 (long long __A, long long __B, long long __C, long long __D, long long __E, long long __F, long long __G, long long __H) { - //CHECK-LABEL: @test_mm512_set_epi64 + //CHECK-LABEL: test_mm512_set_epi64 //CHECK: insertelement{{.*}}i32 0 //CHECK: insertelement{{.*}}i32 1 //CHECK: insertelement{{.*}}i32 2 @@ -10367,7 +10369,7 @@ __m512i test_mm512_setr_epi64 (long long __A, long long __B, long long __C, long long __D, long long __E, long long __F, long long __G, long long __H) { - //CHECK-LABEL: @test_mm512_setr_epi64 + //CHECK-LABEL: test_mm512_setr_epi64 //CHECK: load{{.*}}%{{.*}}, align 8 //CHECK: load{{.*}}%{{.*}}, align 8 //CHECK: load{{.*}}%{{.*}}, align 8 @@ -10390,7 +10392,7 @@ __m512i test_mm512_setr_epi64 (long long __A, long long __B, long long __C, __m512d test_mm512_set_pd (double __A, double __B, double __C, double __D, double __E, double __F, double __G, double __H) { - //CHECK-LABEL: @test_mm512_set_pd + //CHECK-LABEL: test_mm512_set_pd //CHECK: insertelement{{.*}}i32 0 //CHECK: insertelement{{.*}}i32 1 //CHECK: insertelement{{.*}}i32 2 @@ -10405,7 +10407,7 @@ __m512d test_mm512_set_pd (double __A, double __B, double __C, double __D, __m512d test_mm512_setr_pd (double __A, double __B, double __C, double __D, double __E, double __F, double __G, double __H) { - //CHECK-LABEL: @test_mm512_setr_pd + //CHECK-LABEL: test_mm512_setr_pd //CHECK: load{{.*}}%{{.*}}, align 8 //CHECK: load{{.*}}%{{.*}}, align 8 //CHECK: load{{.*}}%{{.*}}, align 8 @@ -10430,7 +10432,7 @@ __m512 test_mm512_set_ps (float __A, float __B, float __C, float __D, float __I, float __J, float __K, float __L, float __M, float __N, float __O, float __P) { - //CHECK-LABEL: @test_mm512_set_ps + //CHECK-LABEL: test_mm512_set_ps //CHECK: insertelement{{.*}}i32 0 //CHECK: insertelement{{.*}}i32 1 //CHECK: insertelement{{.*}}i32 2 @@ -10453,23 +10455,23 @@ __m512 test_mm512_set_ps (float __A, float __B, float __C, float __D, __m512i test_mm512_mask_abs_epi64 (__m512i __W, __mmask8 __U, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_abs_epi64 - // CHECK: [[ABS:%.*]] = call <8 x i64> @llvm.abs.v8i64(<8 x i64> %{{.*}}, i1 false) + // CHECK-LABEL: test_mm512_mask_abs_epi64 + // CHECK: [[ABS:%.*]] = call {{.*}}<8 x i64> @llvm.abs.v8i64(<8 x i64> %{{.*}}, i1 false) // CHECK: select <8 x i1> %{{.*}}, <8 x i64> [[ABS]], <8 x i64> %{{.*}} return _mm512_mask_abs_epi64 (__W,__U,__A); } __m512i test_mm512_maskz_abs_epi64 (__mmask8 __U, __m512i __A) { - // CHECK-LABEL: @test_mm512_maskz_abs_epi64 - // CHECK: [[ABS:%.*]] = call <8 x i64> @llvm.abs.v8i64(<8 x i64> %{{.*}}, i1 false) + // CHECK-LABEL: test_mm512_maskz_abs_epi64 + // CHECK: [[ABS:%.*]] = call {{.*}}<8 x i64> @llvm.abs.v8i64(<8 x i64> %{{.*}}, i1 false) // CHECK: select <8 x i1> %{{.*}}, <8 x i64> [[ABS]], <8 x i64> %{{.*}} return _mm512_maskz_abs_epi64 (__U,__A); } __m512i test_mm512_mask_abs_epi32 (__m512i __W, __mmask16 __U, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_abs_epi32 + // CHECK-LABEL: test_mm512_mask_abs_epi32 // CHECK: [[ABS:%.*]] = call <16 x i32> @llvm.abs.v16i32(<16 x i32> %{{.*}}, i1 false) // CHECK: [[TMP:%.*]] = bitcast <16 x i32> [[ABS]] to <8 x i64> // CHECK: [[ABS:%.*]] = bitcast <8 x i64> [[TMP]] to <16 x i32> @@ -10479,7 +10481,7 @@ __m512i test_mm512_mask_abs_epi32 (__m512i __W, __mmask16 __U, __m512i __A) __m512i test_mm512_maskz_abs_epi32 (__mmask16 __U, __m512i __A) { - // CHECK-LABEL: @test_mm512_maskz_abs_epi32 + // CHECK-LABEL: test_mm512_maskz_abs_epi32 // CHECK: [[ABS:%.*]] = call <16 x i32> @llvm.abs.v16i32(<16 x i32> %{{.*}}, i1 false) // CHECK: [[TMP:%.*]] = bitcast <16 x i32> [[ABS]] to <8 x i64> // CHECK: [[ABS:%.*]] = bitcast <8 x i64> [[TMP]] to <16 x i32> @@ -10492,7 +10494,7 @@ __m512 test_mm512_setr_ps (float __A, float __B, float __C, float __D, float __I, float __J, float __K, float __L, float __M, float __N, float __O, float __P) { - //CHECK-LABEL: @test_mm512_setr_ps + //CHECK-LABEL: test_mm512_setr_ps //CHECK: load{{.*}}%{{.*}}, align 4 //CHECK: load{{.*}}%{{.*}}, align 4 //CHECK: load{{.*}}%{{.*}}, align 4 @@ -10531,14 +10533,14 @@ __m512 test_mm512_setr_ps (float __A, float __B, float __C, float __D, int test_mm_cvtss_i32(__m128 A) { // CHECK-LABEL: test_mm_cvtss_i32 - // CHECK: call i32 @llvm.x86.sse.cvtss2si(<4 x float> %{{.*}}) + // CHECK: call {{.*}}i32 @llvm.x86.sse.cvtss2si(<4 x float> %{{.*}}) return _mm_cvtss_i32(A); } #ifdef __x86_64__ long long test_mm_cvtss_i64(__m128 A) { // CHECK-LABEL: test_mm_cvtss_i64 - // CHECK: call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %{{.*}}) + // CHECK: call {{.*}}i64 @llvm.x86.sse.cvtss2si64(<4 x float> %{{.*}}) return _mm_cvtss_i64(A); } #endif @@ -10577,38 +10579,38 @@ __m128 test_mm_cvti64_ss(__m128 A, long long B) { int test_mm_cvtsd_i32(__m128d A) { // CHECK-LABEL: test_mm_cvtsd_i32 - // CHECK: call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %{{.*}}) + // CHECK: call {{.*}}i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %{{.*}}) return _mm_cvtsd_i32(A); } #ifdef __x86_64__ long long test_mm_cvtsd_i64(__m128d A) { // CHECK-LABEL: test_mm_cvtsd_i64 - // CHECK: call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %{{.*}}) + // CHECK: call {{.*}}i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %{{.*}}) return _mm_cvtsd_i64(A); } #endif __m128d test_mm_mask_cvtss_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128 __B) { - // CHECK-LABEL: @test_mm_mask_cvtss_sd + // CHECK-LABEL: test_mm_mask_cvtss_sd // CHECK: @llvm.x86.avx512.mask.cvtss2sd.round return _mm_mask_cvtss_sd(__W, __U, __A, __B); } __m128d test_mm_maskz_cvtss_sd( __mmask8 __U, __m128d __A, __m128 __B) { - // CHECK-LABEL: @test_mm_maskz_cvtss_sd + // CHECK-LABEL: test_mm_maskz_cvtss_sd // CHECK: @llvm.x86.avx512.mask.cvtss2sd.round return _mm_maskz_cvtss_sd( __U, __A, __B); } __m128 test_mm_mask_cvtsd_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128d __B) { - // CHECK-LABEL: @test_mm_mask_cvtsd_ss + // CHECK-LABEL: test_mm_mask_cvtsd_ss // CHECK: @llvm.x86.avx512.mask.cvtsd2ss.round return _mm_mask_cvtsd_ss(__W, __U, __A, __B); } __m128 test_mm_maskz_cvtsd_ss(__mmask8 __U, __m128 __A, __m128d __B) { - // CHECK-LABEL: @test_mm_maskz_cvtsd_ss + // CHECK-LABEL: test_mm_maskz_cvtsd_ss // CHECK: @llvm.x86.avx512.mask.cvtsd2ss.round return _mm_maskz_cvtsd_ss(__U, __A, __B); } @@ -10616,35 +10618,35 @@ __m128 test_mm_maskz_cvtsd_ss(__mmask8 __U, __m128 __A, __m128d __B) { __m512i test_mm512_setzero_epi32(void) { - // CHECK-LABEL: @test_mm512_setzero_epi32 + // CHECK-LABEL: test_mm512_setzero_epi32 // CHECK: zeroinitializer return _mm512_setzero_epi32(); } __m512 test_mm512_setzero(void) { - // CHECK-LABEL: @test_mm512_setzero + // CHECK-LABEL: test_mm512_setzero // CHECK: zeroinitializer return _mm512_setzero(); } __m512i test_mm512_setzero_si512(void) { - // CHECK-LABEL: @test_mm512_setzero_si512 + // CHECK-LABEL: test_mm512_setzero_si512 // CHECK: zeroinitializer return _mm512_setzero_si512(); } __m512 test_mm512_setzero_ps(void) { - // CHECK-LABEL: @test_mm512_setzero_ps + // CHECK-LABEL: test_mm512_setzero_ps // CHECK: zeroinitializer return _mm512_setzero_ps(); } __m512d test_mm512_setzero_pd(void) { - // CHECK-LABEL: @test_mm512_setzero_pd + // CHECK-LABEL: test_mm512_setzero_pd // CHECK: zeroinitializer return _mm512_setzero_pd(); } @@ -10665,7 +10667,7 @@ int test_mm512_mask2int(__mmask16 __a) __m128 test_mm_mask_move_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { - // CHECK-LABEL: @test_mm_mask_move_ss + // CHECK-LABEL: test_mm_mask_move_ss // CHECK: [[EXT:%.*]] = extractelement <4 x float> %{{.*}}, i32 0 // CHECK: insertelement <4 x float> %{{.*}}, float [[EXT]], i32 0 // CHECK: [[A:%.*]] = extractelement <4 x float> [[VEC:%.*]], i64 0 @@ -10679,7 +10681,7 @@ __m128 test_mm_mask_move_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) __m128 test_mm_maskz_move_ss (__mmask8 __U, __m128 __A, __m128 __B) { - // CHECK-LABEL: @test_mm_maskz_move_ss + // CHECK-LABEL: test_mm_maskz_move_ss // CHECK: [[EXT:%.*]] = extractelement <4 x float> %{{.*}}, i32 0 // CHECK: insertelement <4 x float> %{{.*}}, float [[EXT]], i32 0 // CHECK: [[A:%.*]] = extractelement <4 x float> [[VEC:%.*]], i64 0 @@ -10693,7 +10695,7 @@ __m128 test_mm_maskz_move_ss (__mmask8 __U, __m128 __A, __m128 __B) __m128d test_mm_mask_move_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { - // CHECK-LABEL: @test_mm_mask_move_sd + // CHECK-LABEL: test_mm_mask_move_sd // CHECK: [[EXT:%.*]] = extractelement <2 x double> %{{.*}}, i32 0 // CHECK: insertelement <2 x double> %{{.*}}, double [[EXT]], i32 0 // CHECK: [[A:%.*]] = extractelement <2 x double> [[VEC:%.*]], i64 0 @@ -10707,7 +10709,7 @@ __m128d test_mm_mask_move_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __ __m128d test_mm_maskz_move_sd (__mmask8 __U, __m128d __A, __m128d __B) { - // CHECK-LABEL: @test_mm_maskz_move_sd + // CHECK-LABEL: test_mm_maskz_move_sd // CHECK: [[EXT:%.*]] = extractelement <2 x double> %{{.*}}, i32 0 // CHECK: insertelement <2 x double> %{{.*}}, double [[EXT]], i32 0 // CHECK: [[A:%.*]] = extractelement <2 x double> [[VEC:%.*]], i64 0 @@ -10721,54 +10723,54 @@ __m128d test_mm_maskz_move_sd (__mmask8 __U, __m128d __A, __m128d __B) void test_mm_mask_store_ss(float * __P, __mmask8 __U, __m128 __A) { - // CHECK-LABEL: @test_mm_mask_store_ss + // CHECK-LABEL: test_mm_mask_store_ss // CHECK: call void @llvm.masked.store.v4f32.p0(<4 x float> %{{.*}}, ptr %{{.*}}, i32 1, <4 x i1> %{{.*}}) _mm_mask_store_ss(__P, __U, __A); } void test_mm_mask_store_sd(double * __P, __mmask8 __U, __m128d __A) { - // CHECK-LABEL: @test_mm_mask_store_sd + // CHECK-LABEL: test_mm_mask_store_sd // CHECK: call void @llvm.masked.store.v2f64.p0(<2 x double> %{{.*}}, ptr %{{.*}}, i32 1, <2 x i1> %{{.*}}) _mm_mask_store_sd(__P, __U, __A); } __m128 test_mm_mask_load_ss(__m128 __A, __mmask8 __U, const float* __W) { - // CHECK-LABEL: @test_mm_mask_load_ss - // CHECK: call <4 x float> @llvm.masked.load.v4f32.p0(ptr %{{.*}}, i32 1, <4 x i1> %{{.*}}, <4 x float> %{{.*}}) + // CHECK-LABEL: test_mm_mask_load_ss + // CHECK: call {{.*}}<4 x float> @llvm.masked.load.v4f32.p0(ptr %{{.*}}, i32 1, <4 x i1> %{{.*}}, <4 x float> %{{.*}}) return _mm_mask_load_ss(__A, __U, __W); } __m128 test_mm_maskz_load_ss (__mmask8 __U, const float * __W) { - // CHECK-LABEL: @test_mm_maskz_load_ss - // CHECK: call <4 x float> @llvm.masked.load.v4f32.p0(ptr %{{.*}}, i32 1, <4 x i1> %{{.*}}, <4 x float> %{{.*}}) + // CHECK-LABEL: test_mm_maskz_load_ss + // CHECK: call {{.*}}<4 x float> @llvm.masked.load.v4f32.p0(ptr %{{.*}}, i32 1, <4 x i1> %{{.*}}, <4 x float> %{{.*}}) return _mm_maskz_load_ss (__U, __W); } __m128d test_mm_mask_load_sd (__m128d __A, __mmask8 __U, const double * __W) { - // CHECK-LABEL: @test_mm_mask_load_sd - // CHECK: call <2 x double> @llvm.masked.load.v2f64.p0(ptr %{{.*}}, i32 1, <2 x i1> %{{.*}}, <2 x double> %{{.*}}) + // CHECK-LABEL: test_mm_mask_load_sd + // CHECK: call {{.*}}<2 x double> @llvm.masked.load.v2f64.p0(ptr %{{.*}}, i32 1, <2 x i1> %{{.*}}, <2 x double> %{{.*}}) return _mm_mask_load_sd (__A, __U, __W); } __m128d test_mm_maskz_load_sd (__mmask8 __U, const double * __W) { - // CHECK-LABEL: @test_mm_maskz_load_sd - // CHECK: call <2 x double> @llvm.masked.load.v2f64.p0(ptr %{{.*}}, i32 1, <2 x i1> %{{.*}}, <2 x double> %{{.*}}) + // CHECK-LABEL: test_mm_maskz_load_sd + // CHECK: call {{.*}}<2 x double> @llvm.masked.load.v2f64.p0(ptr %{{.*}}, i32 1, <2 x i1> %{{.*}}, <2 x double> %{{.*}}) return _mm_maskz_load_sd (__U, __W); } __m512d test_mm512_abs_pd(__m512d a){ - // CHECK-LABEL: @test_mm512_abs_pd + // CHECK-LABEL: test_mm512_abs_pd // CHECK: and <8 x i64> return _mm512_abs_pd(a); } __m512d test_mm512_mask_abs_pd (__m512d __W, __mmask8 __U, __m512d __A){ - // CHECK-LABEL: @test_mm512_mask_abs_pd + // CHECK-LABEL: test_mm512_mask_abs_pd // CHECK: %[[AND_RES:.*]] = and <8 x i64> // CHECK: %[[MASK:.*]] = bitcast i8 %{{.*}} to <8 x i1> // CHECK: select <8 x i1> %[[MASK]], <8 x i64> %[[AND_RES]], <8 x i64> %{{.*}} @@ -10776,13 +10778,13 @@ __m512d test_mm512_mask_abs_pd (__m512d __W, __mmask8 __U, __m512d __A){ } __m512 test_mm512_abs_ps(__m512 a){ - // CHECK-LABEL: @test_mm512_abs_ps + // CHECK-LABEL: test_mm512_abs_ps // CHECK: and <16 x i32> return _mm512_abs_ps(a); } __m512 test_mm512_mask_abs_ps(__m512 __W, __mmask16 __U, __m512 __A){ - // CHECK-LABEL: @test_mm512_mask_abs_ps + // CHECK-LABEL: test_mm512_mask_abs_ps // CHECK: and <16 x i32> // CHECK: %[[MASK:.*]] = bitcast i16 %{{.*}} to <16 x i1> // CHECK: select <16 x i1> %[[MASK]], <16 x i32> %{{.*}}, <16 x i32> %{{.*}} @@ -10832,49 +10834,71 @@ __m512i test_mm512_zextsi256_si512(__m256i A) { } __m512d test_mm512_i32logather_pd(__m512i __index, void const *__addr) { - // CHECK-LABEL: @test_mm512_i32logather_pd + // CHECK-LABEL: test_mm512_i32logather_pd // CHECK: @llvm.x86.avx512.mask.gather.dpd.512 return _mm512_i32logather_pd(__index, __addr, 2); } __m512d test_mm512_mask_i32logather_pd(__m512d __v1_old, __mmask8 __mask, __m512i __index, void const *__addr) { - // CHECK-LABEL: @test_mm512_mask_i32logather_pd + // CHECK-LABEL: test_mm512_mask_i32logather_pd // CHECK: @llvm.x86.avx512.mask.gather.dpd.512 return _mm512_mask_i32logather_pd(__v1_old, __mask, __index, __addr, 2); } void test_mm512_i32loscatter_pd(void *__addr, __m512i __index, __m512d __v1) { - // CHECK-LABEL: @test_mm512_i32loscatter_pd + // CHECK-LABEL: test_mm512_i32loscatter_pd // CHECK: @llvm.x86.avx512.mask.scatter.dpd.512 return _mm512_i32loscatter_pd(__addr, __index, __v1, 2); } void test_mm512_mask_i32loscatter_pd(void *__addr, __mmask8 __mask, __m512i __index, __m512d __v1) { - // CHECK-LABEL: @test_mm512_mask_i32loscatter_pd + // CHECK-LABEL: test_mm512_mask_i32loscatter_pd // CHECK: @llvm.x86.avx512.mask.scatter.dpd.512 return _mm512_mask_i32loscatter_pd(__addr, __mask, __index, __v1, 2); } __m512i test_mm512_i32logather_epi64(__m512i __index, void const *__addr) { - // CHECK-LABEL: @test_mm512_i32logather_epi64 + // CHECK-LABEL: test_mm512_i32logather_epi64 // CHECK: @llvm.x86.avx512.mask.gather.dpq.512 return _mm512_i32logather_epi64(__index, __addr, 2); } __m512i test_mm512_mask_i32logather_epi64(__m512i __v1_old, __mmask8 __mask, __m512i __index, void const *__addr) { - // CHECK-LABEL: @test_mm512_mask_i32logather_epi64 + // CHECK-LABEL: test_mm512_mask_i32logather_epi64 // CHECK: @llvm.x86.avx512.mask.gather.dpq.512 return _mm512_mask_i32logather_epi64(__v1_old, __mask, __index, __addr, 2); } void test_mm512_i32loscatter_epi64(void *__addr, __m512i __index, __m512i __v1) { - // CHECK-LABEL: @test_mm512_i32loscatter_epi64 + // CHECK-LABEL: test_mm512_i32loscatter_epi64 // CHECK: @llvm.x86.avx512.mask.scatter.dpq.512 _mm512_i32loscatter_epi64(__addr, __index, __v1, 2); } void test_mm512_mask_i32loscatter_epi64(void *__addr, __mmask8 __mask, __m512i __index, __m512i __v1) { - // CHECK-LABEL: @test_mm512_mask_i32loscatter_epi64 + // CHECK-LABEL: test_mm512_mask_i32loscatter_epi64 // CHECK: @llvm.x86.avx512.mask.scatter.dpq.512 _mm512_mask_i32loscatter_epi64(__addr, __mask, __index, __v1, 2); } + +// Test constexpr handling. +#if defined(__cplusplus) && (__cplusplus >= 201103L) + +void test_constexpr() { + constexpr __m512 v_mm512_setzero = _mm512_setzero(); + static_assert(v_mm512_setzero[0] == +0.0f && v_mm512_setzero[1] == +0.0f && v_mm512_setzero[2] == +0.0f && v_mm512_setzero[3] == +0.0f && v_mm512_setzero[4] == +0.0f && v_mm512_setzero[5] == +0.0f && v_mm512_setzero[6] == +0.0f && v_mm512_setzero[7] == +0.0f && v_mm512_setzero[8] == +0.0f && v_mm512_setzero[9] == +0.0f && v_mm512_setzero[10] == +0.0f && v_mm512_setzero[11] == +0.0f && v_mm512_setzero[12] == +0.0f && v_mm512_setzero[13] == +0.0f && v_mm512_setzero[14] == +0.0f && v_mm512_setzero[15] == +0.0f); + + constexpr __m512 v_mm512_setzero_ps = _mm512_setzero_ps(); + static_assert(v_mm512_setzero_ps[0] == +0.0f && v_mm512_setzero_ps[1] == +0.0f && v_mm512_setzero_ps[2] == +0.0f && v_mm512_setzero_ps[3] == +0.0f && v_mm512_setzero_ps[4] == +0.0f && v_mm512_setzero_ps[5] == +0.0f && v_mm512_setzero_ps[6] == +0.0f && v_mm512_setzero_ps[7] == +0.0f && v_mm512_setzero_ps[8] == +0.0f && v_mm512_setzero_ps[9] == +0.0f && v_mm512_setzero_ps[10] == +0.0f && v_mm512_setzero_ps[11] == +0.0f && v_mm512_setzero_ps[12] == +0.0f && v_mm512_setzero_ps[13] == +0.0f && v_mm512_setzero_ps[14] == +0.0f && v_mm512_setzero_ps[15] == +0.0f); + + constexpr __m512d v_mm512_setzero_pd = _mm512_setzero_pd(); + static_assert(v_mm512_setzero_pd[0] == +0.0 && v_mm512_setzero_pd[1] == +0.0 && v_mm512_setzero_pd[2] == +0.0 && v_mm512_setzero_pd[3] == +0.0 && v_mm512_setzero_pd[4] == +0.0 && v_mm512_setzero_pd[5] == +0.0 && v_mm512_setzero_pd[6] == +0.0 && v_mm512_setzero_pd[7] == +0.0); + + constexpr __m512i v_mm512_setzero_si512 = _mm512_setzero_si512(); + static_assert(v_mm512_setzero_si512[0] == 0x0000000000000000ULL && v_mm512_setzero_si512[1] == 0x0000000000000000ULL && v_mm512_setzero_si512[2] == 0x0000000000000000ULL && v_mm512_setzero_si512[3] == 0x0000000000000000ULL && v_mm512_setzero_si512[4] == 0x0000000000000000ULL && v_mm512_setzero_si512[5] == 0x0000000000000000ULL && v_mm512_setzero_si512[6] == 0x0000000000000000ULL && v_mm512_setzero_si512[7] == 0x0000000000000000ULL); + + constexpr __m512i v_mm512_setzero_epi32 = _mm512_setzero_epi32(); + static_assert(v_mm512_setzero_epi32[0] == 0x0000000000000000ULL && v_mm512_setzero_epi32[1] == 0x0000000000000000ULL && v_mm512_setzero_epi32[2] == 0x0000000000000000ULL && v_mm512_setzero_epi32[3] == 0x0000000000000000ULL && v_mm512_setzero_epi32[4] == 0x0000000000000000ULL && v_mm512_setzero_epi32[5] == 0x0000000000000000ULL && v_mm512_setzero_epi32[6] == 0x0000000000000000ULL && v_mm512_setzero_epi32[7] == 0x0000000000000000ULL); +} + +#endif diff --git a/clang/test/CodeGen/X86/f16c-builtins-constrained.c b/clang/test/CodeGen/X86/f16c-builtins-constrained.c index d616fa56fb71a7d..bbd4d8f83b53a60 100644 --- a/clang/test/CodeGen/X86/f16c-builtins-constrained.c +++ b/clang/test/CodeGen/X86/f16c-builtins-constrained.c @@ -1,4 +1,7 @@ -// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +f16c -emit-llvm -ffp-exception-behavior=strict -o - -Wall -Werror | FileCheck %s +// RUN: %clang_cc1 -x c -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +f16c -emit-llvm -ffp-exception-behavior=strict -o - -Wall -Werror | FileCheck %s +// RUN: %clang_cc1 -x c -ffreestanding %s -triple=i386-apple-darwin -target-feature +f16c -emit-llvm -ffp-exception-behavior=strict -o - -Wall -Werror | FileCheck %s +// RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +f16c -emit-llvm -ffp-exception-behavior=strict -o - -Wall -Werror | FileCheck %s +// RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=i386-apple-darwin -target-feature +f16c -emit-llvm -ffp-exception-behavior=strict -o - -Wall -Werror | FileCheck %s #include @@ -36,13 +39,13 @@ unsigned short test_cvtss_sh(float a) { __m128 test_mm_cvtph_ps(__m128i a) { // CHECK-LABEL: test_mm_cvtph_ps // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <4 x i32> - // CHECK: call <4 x float> @llvm.experimental.constrained.fpext.v4f32.v4f16(<4 x half> %{{.*}}, metadata !"fpexcept.strict") + // CHECK: call {{.*}}<4 x float> @llvm.experimental.constrained.fpext.v4f32.v4f16(<4 x half> %{{.*}}, metadata !"fpexcept.strict") return _mm_cvtph_ps(a); } __m256 test_mm256_cvtph_ps(__m128i a) { // CHECK-LABEL: test_mm256_cvtph_ps - // CHECK: call <8 x float> @llvm.experimental.constrained.fpext.v8f32.v8f16(<8 x half> %{{.*}}, metadata !"fpexcept.strict") + // CHECK: call {{.*}}<8 x float> @llvm.experimental.constrained.fpext.v8f32.v8f16(<8 x half> %{{.*}}, metadata !"fpexcept.strict") return _mm256_cvtph_ps(a); } diff --git a/clang/test/CodeGen/X86/f16c-builtins.c b/clang/test/CodeGen/X86/f16c-builtins.c index 7f2fda654649e44..3c6d64c225b32bd 100644 --- a/clang/test/CodeGen/X86/f16c-builtins.c +++ b/clang/test/CodeGen/X86/f16c-builtins.c @@ -1,4 +1,7 @@ -// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +f16c -emit-llvm -o - -Wall -Werror | FileCheck %s +// RUN: %clang_cc1 -x c -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +f16c -emit-llvm -o - -Wall -Werror | FileCheck %s +// RUN: %clang_cc1 -x c -ffreestanding %s -triple=i386-apple-darwin -target-feature +f16c -emit-llvm -o - -Wall -Werror | FileCheck %s +// RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +f16c -emit-llvm -o - -Wall -Werror | FileCheck %s +// RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=i386-apple-darwin -target-feature +f16c -emit-llvm -o - -Wall -Werror | FileCheck %s #include diff --git a/clang/test/CodeGen/X86/fma-builtins-constrained.c b/clang/test/CodeGen/X86/fma-builtins-constrained.c index 74177ef7d1c9d63..019dde2e0251495 100644 --- a/clang/test/CodeGen/X86/fma-builtins-constrained.c +++ b/clang/test/CodeGen/X86/fma-builtins-constrained.c @@ -1,8 +1,12 @@ // REQUIRES: x86-registered-target -// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +fma -O2 -emit-llvm -o - | FileCheck %s --check-prefixes=COMMON,COMMONIR,UNCONSTRAINED -// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +fma -ffp-exception-behavior=maytrap -DSTRICT=1 -O2 -emit-llvm -o - | FileCheck %s --check-prefixes=COMMON,COMMONIR,CONSTRAINED -// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +fma -O2 -S -o - | FileCheck %s --check-prefixes=COMMON,CHECK-ASM -// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +fma -O2 -ffp-exception-behavior=maytrap -DSTRICT=1 -S -o - | FileCheck %s --check-prefixes=COMMON,CHECK-ASM +// RUN: %clang_cc1 -x c -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +fma -O2 -emit-llvm -o - | FileCheck %s --check-prefixes=COMMON,COMMONIR,UNCONSTRAINED +// RUN: %clang_cc1 -x c -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +fma -ffp-exception-behavior=maytrap -DSTRICT=1 -O2 -emit-llvm -o - | FileCheck %s --check-prefixes=COMMON,COMMONIR,CONSTRAINED +// RUN: %clang_cc1 -x c -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +fma -O2 -S -o - | FileCheck %s --check-prefixes=COMMON,CHECK-ASM +// RUN: %clang_cc1 -x c -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +fma -O2 -ffp-exception-behavior=maytrap -DSTRICT=1 -S -o - | FileCheck %s --check-prefixes=COMMON,CHECK-ASM +// RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +fma -O2 -emit-llvm -o - | FileCheck %s --check-prefixes=COMMON,COMMONIR,UNCONSTRAINED +// RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +fma -ffp-exception-behavior=maytrap -DSTRICT=1 -O2 -emit-llvm -o - | FileCheck %s --check-prefixes=COMMON,COMMONIR,CONSTRAINED +// RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +fma -O2 -S -o - | FileCheck %s --check-prefixes=COMMON,CHECK-ASM +// RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +fma -O2 -ffp-exception-behavior=maytrap -DSTRICT=1 -S -o - | FileCheck %s --check-prefixes=COMMON,CHECK-ASM #ifdef STRICT // Test that the constrained intrinsics are picking up the exception @@ -15,16 +19,16 @@ __m128 test_mm_fmadd_ps(__m128 a, __m128 b, __m128 c) { // COMMON-LABEL: test_mm_fmadd_ps - // UNCONSTRAINED: call <4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}) - // CONSTRAINED: call <4 x float> @llvm.experimental.constrained.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !{{.*}}) + // UNCONSTRAINED: call {{.*}}<4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}) + // CONSTRAINED: call {{.*}}<4 x float> @llvm.experimental.constrained.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !{{.*}}) // CHECK-ASM: vfmadd213ps return _mm_fmadd_ps(a, b, c); } __m128d test_mm_fmadd_pd(__m128d a, __m128d b, __m128d c) { // COMMON-LABEL: test_mm_fmadd_pd - // UNCONSTRAINED: call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}) - // CONSTRAINED: call <2 x double> @llvm.experimental.constrained.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !{{.*}}) + // UNCONSTRAINED: call {{.*}}<2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}) + // CONSTRAINED: call {{.*}}<2 x double> @llvm.experimental.constrained.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !{{.*}}) // CHECK-ASM: vfmadd213pd return _mm_fmadd_pd(a, b, c); } @@ -56,8 +60,8 @@ __m128d test_mm_fmadd_sd(__m128d a, __m128d b, __m128d c) { __m128 test_mm_fmsub_ps(__m128 a, __m128 b, __m128 c) { // COMMON-LABEL: test_mm_fmsub_ps // COMMONIR: [[NEG:%.+]] = fneg <4 x float> %{{.+}} - // UNCONSTRAINED: call <4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}) - // CONSTRAINED: call <4 x float> @llvm.experimental.constrained.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !{{.*}}) + // UNCONSTRAINED: call {{.*}}<4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}) + // CONSTRAINED: call {{.*}}<4 x float> @llvm.experimental.constrained.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !{{.*}}) // CHECK-ASM: vfmsub213ps return _mm_fmsub_ps(a, b, c); } @@ -65,8 +69,8 @@ __m128 test_mm_fmsub_ps(__m128 a, __m128 b, __m128 c) { __m128d test_mm_fmsub_pd(__m128d a, __m128d b, __m128d c) { // COMMON-LABEL: test_mm_fmsub_pd // COMMONIR: [[NEG:%.+]] = fneg <2 x double> %{{.+}} - // UNCONSTRAINED: call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}) - // CONSTRAINED: call <2 x double> @llvm.experimental.constrained.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !{{.*}}) + // UNCONSTRAINED: call {{.*}}<2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}) + // CONSTRAINED: call {{.*}}<2 x double> @llvm.experimental.constrained.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !{{.*}}) // CHECK-ASM: vfmsub213pd return _mm_fmsub_pd(a, b, c); } @@ -100,8 +104,8 @@ __m128d test_mm_fmsub_sd(__m128d a, __m128d b, __m128d c) { __m128 test_mm_fnmadd_ps(__m128 a, __m128 b, __m128 c) { // COMMON-LABEL: test_mm_fnmadd_ps // COMMONIR: [[NEG:%.+]] = fneg <4 x float> %{{.+}} - // UNCONSTRAINED: call <4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}) - // CONSTRAINED: call <4 x float> @llvm.experimental.constrained.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !{{.*}}) + // UNCONSTRAINED: call {{.*}}<4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}) + // CONSTRAINED: call {{.*}}<4 x float> @llvm.experimental.constrained.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !{{.*}}) // CHECK-ASM: vfnmadd213ps return _mm_fnmadd_ps(a, b, c); } @@ -109,8 +113,8 @@ __m128 test_mm_fnmadd_ps(__m128 a, __m128 b, __m128 c) { __m128d test_mm_fnmadd_pd(__m128d a, __m128d b, __m128d c) { // COMMON-LABEL: test_mm_fnmadd_pd // COMMONIR: [[NEG:%.+]] = fneg <2 x double> %{{.+}} - // UNCONSTRAINED: call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}) - // CONSTRAINED: call <2 x double> @llvm.experimental.constrained.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !{{.*}}) + // UNCONSTRAINED: call {{.*}}<2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}) + // CONSTRAINED: call {{.*}}<2 x double> @llvm.experimental.constrained.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !{{.*}}) // CHECK-ASM: vfnmadd213pd return _mm_fnmadd_pd(a, b, c); } @@ -145,8 +149,8 @@ __m128 test_mm_fnmsub_ps(__m128 a, __m128 b, __m128 c) { // COMMON-LABEL: test_mm_fnmsub_ps // COMMONIR: [[NEG:%.+]] = fneg <4 x float> %{{.+}} // COMMONIR: [[NEG2:%.+]] = fneg <4 x float> %{{.+}} - // UNCONSTRAINED: call <4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}) - // CONSTRAINED: call <4 x float> @llvm.experimental.constrained.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !{{.*}}) + // UNCONSTRAINED: call {{.*}}<4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}) + // CONSTRAINED: call {{.*}}<4 x float> @llvm.experimental.constrained.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !{{.*}}) // CHECK-ASM: vfnmsub213ps return _mm_fnmsub_ps(a, b, c); } @@ -155,8 +159,8 @@ __m128d test_mm_fnmsub_pd(__m128d a, __m128d b, __m128d c) { // COMMON-LABEL: test_mm_fnmsub_pd // COMMONIR: [[NEG:%.+]] = fneg <2 x double> %{{.+}} // COMMONIR: [[NEG2:%.+]] = fneg <2 x double> %{{.+}} - // UNCONSTRAINED: call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}) - // CONSTRAINED: call <2 x double> @llvm.experimental.constrained.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !{{.*}}) + // UNCONSTRAINED: call {{.*}}<2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}) + // CONSTRAINED: call {{.*}}<2 x double> @llvm.experimental.constrained.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !{{.*}}) // CHECK-ASM: vfnmsub213pd return _mm_fnmsub_pd(a, b, c); } @@ -192,7 +196,7 @@ __m128d test_mm_fnmsub_sd(__m128d a, __m128d b, __m128d c) { __m128 test_mm_fmaddsub_ps(__m128 a, __m128 b, __m128 c) { // COMMON-LABEL: test_mm_fmaddsub_ps // COMMONIR-NOT: fneg - // COMMONIR: tail call <4 x float> @llvm.x86.fma.vfmaddsub.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}) + // COMMONIR: tail call {{.*}}<4 x float> @llvm.x86.fma.vfmaddsub.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}) // CHECK-ASM: vfmaddsub213ps return _mm_fmaddsub_ps(a, b, c); } @@ -200,7 +204,7 @@ __m128 test_mm_fmaddsub_ps(__m128 a, __m128 b, __m128 c) { __m128d test_mm_fmaddsub_pd(__m128d a, __m128d b, __m128d c) { // COMMON-LABEL: test_mm_fmaddsub_pd // COMMONIR-NOT: fneg - // COMMONIR: tail call <2 x double> @llvm.x86.fma.vfmaddsub.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}) + // COMMONIR: tail call {{.*}}<2 x double> @llvm.x86.fma.vfmaddsub.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}) // CHECK-ASM: vfmaddsub213pd return _mm_fmaddsub_pd(a, b, c); } @@ -208,7 +212,7 @@ __m128d test_mm_fmaddsub_pd(__m128d a, __m128d b, __m128d c) { __m128 test_mm_fmsubadd_ps(__m128 a, __m128 b, __m128 c) { // COMMON-LABEL: test_mm_fmsubadd_ps // COMMONIR: [[FNEG:%.+]] = fneg <4 x float> %{{.*}} - // COMMONIR: tail call <4 x float> @llvm.x86.fma.vfmaddsub.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> [[FNEG]]) + // COMMONIR: tail call {{.*}}<4 x float> @llvm.x86.fma.vfmaddsub.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> [[FNEG]]) // CHECK-ASM: vfmsubadd213ps return _mm_fmsubadd_ps(a, b, c); } @@ -216,23 +220,23 @@ __m128 test_mm_fmsubadd_ps(__m128 a, __m128 b, __m128 c) { __m128d test_mm_fmsubadd_pd(__m128d a, __m128d b, __m128d c) { // COMMON-LABEL: test_mm_fmsubadd_pd // COMMONIR: [[FNEG:%.+]] = fneg <2 x double> %{{.*}} - // COMMONIR: tail call <2 x double> @llvm.x86.fma.vfmaddsub.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> [[FNEG]]) + // COMMONIR: tail call {{.*}}<2 x double> @llvm.x86.fma.vfmaddsub.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> [[FNEG]]) // CHECK-ASM: vfmsubadd213pd return _mm_fmsubadd_pd(a, b, c); } __m256 test_mm256_fmadd_ps(__m256 a, __m256 b, __m256 c) { // COMMON-LABEL: test_mm256_fmadd_ps - // UNCONSTRAINED: call <8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}) - // CONSTRAINED: call <8 x float> @llvm.experimental.constrained.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !{{.*}}) + // UNCONSTRAINED: call {{.*}}<8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}) + // CONSTRAINED: call {{.*}}<8 x float> @llvm.experimental.constrained.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !{{.*}}) // CHECK-ASM: vfmadd213ps return _mm256_fmadd_ps(a, b, c); } __m256d test_mm256_fmadd_pd(__m256d a, __m256d b, __m256d c) { // COMMON-LABEL: test_mm256_fmadd_pd - // UNCONSTRAINED: call <4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}) - // CONSTRAINED: call <4 x double> @llvm.experimental.constrained.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !{{.*}}) + // UNCONSTRAINED: call {{.*}}<4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}) + // CONSTRAINED: call {{.*}}<4 x double> @llvm.experimental.constrained.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !{{.*}}) // CHECK-ASM: vfmadd213pd return _mm256_fmadd_pd(a, b, c); } @@ -240,8 +244,8 @@ __m256d test_mm256_fmadd_pd(__m256d a, __m256d b, __m256d c) { __m256 test_mm256_fmsub_ps(__m256 a, __m256 b, __m256 c) { // COMMON-LABEL: test_mm256_fmsub_ps // COMMONIR: [[NEG:%.+]] = fneg <8 x float> %{{.*}} - // UNCONSTRAINED: call <8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}) - // CONSTRAINED: call <8 x float> @llvm.experimental.constrained.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !{{.*}}) + // UNCONSTRAINED: call {{.*}}<8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}) + // CONSTRAINED: call {{.*}}<8 x float> @llvm.experimental.constrained.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !{{.*}}) // CHECK-ASM: vfmsub213ps return _mm256_fmsub_ps(a, b, c); } @@ -249,8 +253,8 @@ __m256 test_mm256_fmsub_ps(__m256 a, __m256 b, __m256 c) { __m256d test_mm256_fmsub_pd(__m256d a, __m256d b, __m256d c) { // COMMON-LABEL: test_mm256_fmsub_pd // COMMONIR: [[NEG:%.+]] = fneg <4 x double> %{{.+}} - // UNCONSTRAINED: call <4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}) - // CONSTRAINED: call <4 x double> @llvm.experimental.constrained.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !{{.*}}) + // UNCONSTRAINED: call {{.*}}<4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}) + // CONSTRAINED: call {{.*}}<4 x double> @llvm.experimental.constrained.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !{{.*}}) // CHECK-ASM: vfmsub213pd return _mm256_fmsub_pd(a, b, c); } @@ -258,8 +262,8 @@ __m256d test_mm256_fmsub_pd(__m256d a, __m256d b, __m256d c) { __m256 test_mm256_fnmadd_ps(__m256 a, __m256 b, __m256 c) { // COMMON-LABEL: test_mm256_fnmadd_ps // COMMONIR: [[NEG:%.+]] = fneg <8 x float> %{{.*}} - // UNCONSTRAINED: call <8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}) - // CONSTRAINED: call <8 x float> @llvm.experimental.constrained.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !{{.*}}) + // UNCONSTRAINED: call {{.*}}<8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}) + // CONSTRAINED: call {{.*}}<8 x float> @llvm.experimental.constrained.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !{{.*}}) // CHECK-ASM: vfnmadd213ps return _mm256_fnmadd_ps(a, b, c); } @@ -267,8 +271,8 @@ __m256 test_mm256_fnmadd_ps(__m256 a, __m256 b, __m256 c) { __m256d test_mm256_fnmadd_pd(__m256d a, __m256d b, __m256d c) { // COMMON-LABEL: test_mm256_fnmadd_pd // COMMONIR: [[NEG:%.+]] = fneg <4 x double> %{{.+}} - // UNCONSTRAINED: call <4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}) - // CONSTRAINED: call <4 x double> @llvm.experimental.constrained.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !{{.*}}) + // UNCONSTRAINED: call {{.*}}<4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}) + // CONSTRAINED: call {{.*}}<4 x double> @llvm.experimental.constrained.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !{{.*}}) // CHECK-ASM: vfnmadd213pd return _mm256_fnmadd_pd(a, b, c); } @@ -277,8 +281,8 @@ __m256 test_mm256_fnmsub_ps(__m256 a, __m256 b, __m256 c) { // COMMON-LABEL: test_mm256_fnmsub_ps // COMMONIR: [[NEG:%.+]] = fneg <8 x float> %{{.*}} // COMMONIR: [[NEG2:%.+]] = fneg <8 x float> %{{.*}} - // UNCONSTRAINED: call <8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}) - // CONSTRAINED: call <8 x float> @llvm.experimental.constrained.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !{{.*}}) + // UNCONSTRAINED: call {{.*}}<8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}) + // CONSTRAINED: call {{.*}}<8 x float> @llvm.experimental.constrained.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !{{.*}}) // CHECK-ASM: vfnmsub213ps return _mm256_fnmsub_ps(a, b, c); } @@ -287,8 +291,8 @@ __m256d test_mm256_fnmsub_pd(__m256d a, __m256d b, __m256d c) { // COMMON-LABEL: test_mm256_fnmsub_pd // COMMONIR: [[NEG:%.+]] = fneg <4 x double> %{{.+}} // COMMONIR: [[NEG2:%.+]] = fneg <4 x double> %{{.+}} - // UNCONSTRAINED: call <4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}) - // CONSTRAINED: call <4 x double> @llvm.experimental.constrained.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !{{.*}}) + // UNCONSTRAINED: call {{.*}}<4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}) + // CONSTRAINED: call {{.*}}<4 x double> @llvm.experimental.constrained.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !{{.*}}) // CHECK-ASM: vfnmsub213pd return _mm256_fnmsub_pd(a, b, c); } @@ -296,7 +300,7 @@ __m256d test_mm256_fnmsub_pd(__m256d a, __m256d b, __m256d c) { __m256 test_mm256_fmaddsub_ps(__m256 a, __m256 b, __m256 c) { // COMMON-LABEL: test_mm256_fmaddsub_ps // COMMONIR-NOT: fneg - // COMMONIR: tail call <8 x float> @llvm.x86.fma.vfmaddsub.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}) + // COMMONIR: tail call {{.*}}<8 x float> @llvm.x86.fma.vfmaddsub.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}) // CHECK-ASM: vfmaddsub213ps return _mm256_fmaddsub_ps(a, b, c); } @@ -304,7 +308,7 @@ __m256 test_mm256_fmaddsub_ps(__m256 a, __m256 b, __m256 c) { __m256d test_mm256_fmaddsub_pd(__m256d a, __m256d b, __m256d c) { // COMMON-LABEL: test_mm256_fmaddsub_pd // COMMONIR-NOT: fneg - // COMMONIR: tail call <4 x double> @llvm.x86.fma.vfmaddsub.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}) + // COMMONIR: tail call {{.*}}<4 x double> @llvm.x86.fma.vfmaddsub.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}) // CHECK-ASM: vfmaddsub213pd return _mm256_fmaddsub_pd(a, b, c); } @@ -312,7 +316,7 @@ __m256d test_mm256_fmaddsub_pd(__m256d a, __m256d b, __m256d c) { __m256 test_mm256_fmsubadd_ps(__m256 a, __m256 b, __m256 c) { // COMMON-LABEL: test_mm256_fmsubadd_ps // COMMONIR: [[FNEG:%.+]] = fneg <8 x float> %{{.*}} - // COMMONIR: tail call <8 x float> @llvm.x86.fma.vfmaddsub.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> [[FNEG]]) + // COMMONIR: tail call {{.*}}<8 x float> @llvm.x86.fma.vfmaddsub.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> [[FNEG]]) // CHECK-ASM: vfmsubadd213ps return _mm256_fmsubadd_ps(a, b, c); } @@ -320,7 +324,7 @@ __m256 test_mm256_fmsubadd_ps(__m256 a, __m256 b, __m256 c) { __m256d test_mm256_fmsubadd_pd(__m256d a, __m256d b, __m256d c) { // COMMON-LABEL: test_mm256_fmsubadd_pd // COMMONIR: [[FNEG:%.+]] = fneg <4 x double> %{{.*}} - // COMMONIR: tail call <4 x double> @llvm.x86.fma.vfmaddsub.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> [[FNEG]]) + // COMMONIR: tail call {{.*}}<4 x double> @llvm.x86.fma.vfmaddsub.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> [[FNEG]]) // CHECK-ASM: vfmsubadd213pd return _mm256_fmsubadd_pd(a, b, c); } diff --git a/clang/test/CodeGen/X86/fma-builtins.c b/clang/test/CodeGen/X86/fma-builtins.c index 4686b3686a4f8f6..aa17dcc62fbc067 100644 --- a/clang/test/CodeGen/X86/fma-builtins.c +++ b/clang/test/CodeGen/X86/fma-builtins.c @@ -1,17 +1,20 @@ -// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +fma -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +fma -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +fma -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +fma -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +fma -emit-llvm -o - | FileCheck %s #include __m128 test_mm_fmadd_ps(__m128 a, __m128 b, __m128 c) { // CHECK-LABEL: test_mm_fmadd_ps - // CHECK: call <4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}) + // CHECK: call {{.*}}<4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}) return _mm_fmadd_ps(a, b, c); } __m128d test_mm_fmadd_pd(__m128d a, __m128d b, __m128d c) { // CHECK-LABEL: test_mm_fmadd_pd - // CHECK: call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}) + // CHECK: call {{.*}}<2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}) return _mm_fmadd_pd(a, b, c); } @@ -38,14 +41,14 @@ __m128d test_mm_fmadd_sd(__m128d a, __m128d b, __m128d c) { __m128 test_mm_fmsub_ps(__m128 a, __m128 b, __m128 c) { // CHECK-LABEL: test_mm_fmsub_ps // CHECK: [[NEG:%.+]] = fneg <4 x float> %{{.+}} - // CHECK: call <4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}) + // CHECK: call {{.*}}<4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}) return _mm_fmsub_ps(a, b, c); } __m128d test_mm_fmsub_pd(__m128d a, __m128d b, __m128d c) { // CHECK-LABEL: test_mm_fmsub_pd // CHECK: [[NEG:%.+]] = fneg <2 x double> %{{.+}} - // CHECK: call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}) + // CHECK: call {{.*}}<2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}) return _mm_fmsub_pd(a, b, c); } @@ -74,14 +77,14 @@ __m128d test_mm_fmsub_sd(__m128d a, __m128d b, __m128d c) { __m128 test_mm_fnmadd_ps(__m128 a, __m128 b, __m128 c) { // CHECK-LABEL: test_mm_fnmadd_ps // CHECK: [[NEG:%.+]] = fneg <4 x float> %{{.+}} - // CHECK: call <4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}) + // CHECK: call {{.*}}<4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}) return _mm_fnmadd_ps(a, b, c); } __m128d test_mm_fnmadd_pd(__m128d a, __m128d b, __m128d c) { // CHECK-LABEL: test_mm_fnmadd_pd // CHECK: [[NEG:%.+]] = fneg <2 x double> %{{.+}} - // CHECK: call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}) + // CHECK: call {{.*}}<2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}) return _mm_fnmadd_pd(a, b, c); } @@ -111,7 +114,7 @@ __m128 test_mm_fnmsub_ps(__m128 a, __m128 b, __m128 c) { // CHECK-LABEL: test_mm_fnmsub_ps // CHECK: [[NEG:%.+]] = fneg <4 x float> %{{.+}} // CHECK: [[NEG2:%.+]] = fneg <4 x float> %{{.+}} - // CHECK: call <4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}) + // CHECK: call {{.*}}<4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}) return _mm_fnmsub_ps(a, b, c); } @@ -119,7 +122,7 @@ __m128d test_mm_fnmsub_pd(__m128d a, __m128d b, __m128d c) { // CHECK-LABEL: test_mm_fnmsub_pd // CHECK: [[NEG:%.+]] = fneg <2 x double> %{{.+}} // CHECK: [[NEG2:%.+]] = fneg <2 x double> %{{.+}} - // CHECK: call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}) + // CHECK: call {{.*}}<2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}) return _mm_fnmsub_pd(a, b, c); } @@ -150,68 +153,68 @@ __m128d test_mm_fnmsub_sd(__m128d a, __m128d b, __m128d c) { __m128 test_mm_fmaddsub_ps(__m128 a, __m128 b, __m128 c) { // CHECK-LABEL: test_mm_fmaddsub_ps // CHECK-NOT: fneg - // CHECK: call <4 x float> @llvm.x86.fma.vfmaddsub.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}) + // CHECK: call {{.*}}<4 x float> @llvm.x86.fma.vfmaddsub.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}) return _mm_fmaddsub_ps(a, b, c); } __m128d test_mm_fmaddsub_pd(__m128d a, __m128d b, __m128d c) { // CHECK-LABEL: test_mm_fmaddsub_pd // CHECK-NOT: fneg - // CHECK: call <2 x double> @llvm.x86.fma.vfmaddsub.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}) + // CHECK: call {{.*}}<2 x double> @llvm.x86.fma.vfmaddsub.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}) return _mm_fmaddsub_pd(a, b, c); } __m128 test_mm_fmsubadd_ps(__m128 a, __m128 b, __m128 c) { // CHECK-LABEL: test_mm_fmsubadd_ps // CHECK: [[NEG:%.+]] = fneg <4 x float> %{{.+}} - // CHECK: call <4 x float> @llvm.x86.fma.vfmaddsub.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> [[NEG]]) + // CHECK: call {{.*}}<4 x float> @llvm.x86.fma.vfmaddsub.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> [[NEG]]) return _mm_fmsubadd_ps(a, b, c); } __m128d test_mm_fmsubadd_pd(__m128d a, __m128d b, __m128d c) { // CHECK-LABEL: test_mm_fmsubadd_pd // CHECK: [[NEG:%.+]] = fneg <2 x double> %{{.+}} - // CHECK: call <2 x double> @llvm.x86.fma.vfmaddsub.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> [[NEG]]) + // CHECK: call {{.*}}<2 x double> @llvm.x86.fma.vfmaddsub.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> [[NEG]]) return _mm_fmsubadd_pd(a, b, c); } __m256 test_mm256_fmadd_ps(__m256 a, __m256 b, __m256 c) { // CHECK-LABEL: test_mm256_fmadd_ps - // CHECK: call <8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}) + // CHECK: call {{.*}}<8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}) return _mm256_fmadd_ps(a, b, c); } __m256d test_mm256_fmadd_pd(__m256d a, __m256d b, __m256d c) { // CHECK-LABEL: test_mm256_fmadd_pd - // CHECK: call <4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}) + // CHECK: call {{.*}}<4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}) return _mm256_fmadd_pd(a, b, c); } __m256 test_mm256_fmsub_ps(__m256 a, __m256 b, __m256 c) { // CHECK-LABEL: test_mm256_fmsub_ps // CHECK: [[NEG:%.+]] = fneg <8 x float> %{{.*}} - // CHECK: call <8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}) + // CHECK: call {{.*}}<8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}) return _mm256_fmsub_ps(a, b, c); } __m256d test_mm256_fmsub_pd(__m256d a, __m256d b, __m256d c) { // CHECK-LABEL: test_mm256_fmsub_pd // CHECK: [[NEG:%.+]] = fneg <4 x double> %{{.+}} - // CHECK: call <4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}) + // CHECK: call {{.*}}<4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}) return _mm256_fmsub_pd(a, b, c); } __m256 test_mm256_fnmadd_ps(__m256 a, __m256 b, __m256 c) { // CHECK-LABEL: test_mm256_fnmadd_ps // CHECK: [[NEG:%.+]] = fneg <8 x float> %{{.*}} - // CHECK: call <8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}) + // CHECK: call {{.*}}<8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}) return _mm256_fnmadd_ps(a, b, c); } __m256d test_mm256_fnmadd_pd(__m256d a, __m256d b, __m256d c) { // CHECK-LABEL: test_mm256_fnmadd_pd // CHECK: [[NEG:%.+]] = fneg <4 x double> %{{.+}} - // CHECK: call <4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}) + // CHECK: call {{.*}}<4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}) return _mm256_fnmadd_pd(a, b, c); } @@ -219,7 +222,7 @@ __m256 test_mm256_fnmsub_ps(__m256 a, __m256 b, __m256 c) { // CHECK-LABEL: test_mm256_fnmsub_ps // CHECK: [[NEG:%.+]] = fneg <8 x float> %{{.*}} // CHECK: [[NEG2:%.+]] = fneg <8 x float> %{{.*}} - // CHECK: call <8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}) + // CHECK: call {{.*}}<8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}) return _mm256_fnmsub_ps(a, b, c); } @@ -227,34 +230,34 @@ __m256d test_mm256_fnmsub_pd(__m256d a, __m256d b, __m256d c) { // CHECK-LABEL: test_mm256_fnmsub_pd // CHECK: [[NEG:%.+]] = fneg <4 x double> %{{.+}} // CHECK: [[NEG2:%.+]] = fneg <4 x double> %{{.+}} - // CHECK: call <4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}) + // CHECK: call {{.*}}<4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}) return _mm256_fnmsub_pd(a, b, c); } __m256 test_mm256_fmaddsub_ps(__m256 a, __m256 b, __m256 c) { // CHECK-LABEL: test_mm256_fmaddsub_ps // CHECK-NOT: fneg - // CHECK: call <8 x float> @llvm.x86.fma.vfmaddsub.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}) + // CHECK: call {{.*}}<8 x float> @llvm.x86.fma.vfmaddsub.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}) return _mm256_fmaddsub_ps(a, b, c); } __m256d test_mm256_fmaddsub_pd(__m256d a, __m256d b, __m256d c) { // CHECK-LABEL: test_mm256_fmaddsub_pd // CHECK-NOT: fneg - // CHECK: call <4 x double> @llvm.x86.fma.vfmaddsub.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}) + // CHECK: call {{.*}}<4 x double> @llvm.x86.fma.vfmaddsub.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}) return _mm256_fmaddsub_pd(a, b, c); } __m256 test_mm256_fmsubadd_ps(__m256 a, __m256 b, __m256 c) { // CHECK-LABEL: test_mm256_fmsubadd_ps // CHECK: [[NEG:%.+]] = fneg <8 x float> %{{.+}} - // CHECK: call <8 x float> @llvm.x86.fma.vfmaddsub.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> [[NEG]]) + // CHECK: call {{.*}}<8 x float> @llvm.x86.fma.vfmaddsub.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> [[NEG]]) return _mm256_fmsubadd_ps(a, b, c); } __m256d test_mm256_fmsubadd_pd(__m256d a, __m256d b, __m256d c) { // CHECK-LABEL: test_mm256_fmsubadd_pd // CHECK: [[NEG:%.+]] = fneg <4 x double> %{{.+}} - // CHECK: call <4 x double> @llvm.x86.fma.vfmaddsub.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> [[NEG]]) + // CHECK: call {{.*}}<4 x double> @llvm.x86.fma.vfmaddsub.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> [[NEG]]) return _mm256_fmsubadd_pd(a, b, c); } diff --git a/clang/test/CodeGen/X86/fma4-builtins.c b/clang/test/CodeGen/X86/fma4-builtins.c index 94dcaf61f2681aa..ccdba8fea87b50b 100644 --- a/clang/test/CodeGen/X86/fma4-builtins.c +++ b/clang/test/CodeGen/X86/fma4-builtins.c @@ -1,18 +1,20 @@ -// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +fma4 -emit-llvm -o - -Wall -Werror | FileCheck %s -// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +fma4 -emit-llvm -o - -Wall -Werror | FileCheck %s +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +fma4 -emit-llvm -o - -Wall -Werror | FileCheck %s +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +fma4 -emit-llvm -o - -Wall -Werror | FileCheck %s +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +fma4 -emit-llvm -o - -Wall -Werror | FileCheck %s +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +fma4 -emit-llvm -o - -Wall -Werror | FileCheck %s #include __m128 test_mm_macc_ps(__m128 a, __m128 b, __m128 c) { // CHECK-LABEL: test_mm_macc_ps - // CHECK: call <4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}) + // CHECK: call {{.*}}<4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}) return _mm_macc_ps(a, b, c); } __m128d test_mm_macc_pd(__m128d a, __m128d b, __m128d c) { // CHECK-LABEL: test_mm_macc_pd - // CHECK: call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}) + // CHECK: call {{.*}}<2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}) return _mm_macc_pd(a, b, c); } @@ -39,14 +41,14 @@ __m128d test_mm_macc_sd(__m128d a, __m128d b, __m128d c) { __m128 test_mm_msub_ps(__m128 a, __m128 b, __m128 c) { // CHECK-LABEL: test_mm_msub_ps // CHECK: [[NEG:%.+]] = fneg <4 x float> %{{.+}} - // CHECK: call <4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}) + // CHECK: call {{.*}}<4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}) return _mm_msub_ps(a, b, c); } __m128d test_mm_msub_pd(__m128d a, __m128d b, __m128d c) { // CHECK-LABEL: test_mm_msub_pd // CHECK: [[NEG:%.+]] = fneg <2 x double> %{{.+}} - // CHECK: call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}) + // CHECK: call {{.*}}<2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}) return _mm_msub_pd(a, b, c); } @@ -75,14 +77,14 @@ __m128d test_mm_msub_sd(__m128d a, __m128d b, __m128d c) { __m128 test_mm_nmacc_ps(__m128 a, __m128 b, __m128 c) { // CHECK-LABEL: test_mm_nmacc_ps // CHECK: [[NEG:%.+]] = fneg <4 x float> %{{.+}} - // CHECK: call <4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}) + // CHECK: call {{.*}}<4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}) return _mm_nmacc_ps(a, b, c); } __m128d test_mm_nmacc_pd(__m128d a, __m128d b, __m128d c) { // CHECK-LABEL: test_mm_nmacc_pd // CHECK: [[NEG:%.+]] = fneg <2 x double> %{{.+}} - // CHECK: call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}) + // CHECK: call {{.*}}<2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}) return _mm_nmacc_pd(a, b, c); } @@ -112,7 +114,7 @@ __m128 test_mm_nmsub_ps(__m128 a, __m128 b, __m128 c) { // CHECK-LABEL: test_mm_nmsub_ps // CHECK: [[NEG:%.+]] = fneg <4 x float> %{{.+}} // CHECK: [[NEG2:%.+]] = fneg <4 x float> %{{.+}} - // CHECK: call <4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}) + // CHECK: call {{.*}}<4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}) return _mm_nmsub_ps(a, b, c); } @@ -120,7 +122,7 @@ __m128d test_mm_nmsub_pd(__m128d a, __m128d b, __m128d c) { // CHECK-LABEL: test_mm_nmsub_pd // CHECK: [[NEG:%.+]] = fneg <2 x double> %{{.+}} // CHECK: [[NEG2:%.+]] = fneg <2 x double> %{{.+}} - // CHECK: call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}) + // CHECK: call {{.*}}<2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}) return _mm_nmsub_pd(a, b, c); } @@ -151,68 +153,68 @@ __m128d test_mm_nmsub_sd(__m128d a, __m128d b, __m128d c) { __m128 test_mm_maddsub_ps(__m128 a, __m128 b, __m128 c) { // CHECK-LABEL: test_mm_maddsub_ps // CHECK-NOT: fneg - // CHECK: call <4 x float> @llvm.x86.fma.vfmaddsub.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}) + // CHECK: call {{.*}}<4 x float> @llvm.x86.fma.vfmaddsub.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}) return _mm_maddsub_ps(a, b, c); } __m128d test_mm_maddsub_pd(__m128d a, __m128d b, __m128d c) { // CHECK-LABEL: test_mm_maddsub_pd // CHECK-NOT: fneg - // CHECK: call <2 x double> @llvm.x86.fma.vfmaddsub.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}) + // CHECK: call {{.*}}<2 x double> @llvm.x86.fma.vfmaddsub.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}) return _mm_maddsub_pd(a, b, c); } __m128 test_mm_msubadd_ps(__m128 a, __m128 b, __m128 c) { // CHECK-LABEL: test_mm_msubadd_ps // CHECK: [[NEG:%.+]] = fneg <4 x float> %{{.+}} - // CHECK: call <4 x float> @llvm.x86.fma.vfmaddsub.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> [[NEG]]) + // CHECK: call {{.*}}<4 x float> @llvm.x86.fma.vfmaddsub.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> [[NEG]]) return _mm_msubadd_ps(a, b, c); } __m128d test_mm_msubadd_pd(__m128d a, __m128d b, __m128d c) { // CHECK-LABEL: test_mm_msubadd_pd // CHECK: [[NEG:%.+]] = fneg <2 x double> %{{.+}} - // CHECK: call <2 x double> @llvm.x86.fma.vfmaddsub.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> [[NEG]]) + // CHECK: call {{.*}}<2 x double> @llvm.x86.fma.vfmaddsub.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> [[NEG]]) return _mm_msubadd_pd(a, b, c); } __m256 test_mm256_macc_ps(__m256 a, __m256 b, __m256 c) { // CHECK-LABEL: test_mm256_macc_ps - // CHECK: call <8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}) + // CHECK: call {{.*}}<8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}) return _mm256_macc_ps(a, b, c); } __m256d test_mm256_macc_pd(__m256d a, __m256d b, __m256d c) { // CHECK-LABEL: test_mm256_macc_pd - // CHECK: call <4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}) + // CHECK: call {{.*}}<4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}) return _mm256_macc_pd(a, b, c); } __m256 test_mm256_msub_ps(__m256 a, __m256 b, __m256 c) { // CHECK-LABEL: test_mm256_msub_ps // CHECK: [[NEG:%.+]] = fneg <8 x float> %{{.*}} - // CHECK: call <8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}) + // CHECK: call {{.*}}<8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}) return _mm256_msub_ps(a, b, c); } __m256d test_mm256_msub_pd(__m256d a, __m256d b, __m256d c) { // CHECK-LABEL: test_mm256_msub_pd // CHECK: [[NEG:%.+]] = fneg <4 x double> %{{.+}} - // CHECK: call <4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}) + // CHECK: call {{.*}}<4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}) return _mm256_msub_pd(a, b, c); } __m256 test_mm256_nmacc_ps(__m256 a, __m256 b, __m256 c) { // CHECK-LABEL: test_mm256_nmacc_ps // CHECK: [[NEG:%.+]] = fneg <8 x float> %{{.*}} - // CHECK: call <8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}) + // CHECK: call {{.*}}<8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}) return _mm256_nmacc_ps(a, b, c); } __m256d test_mm256_nmacc_pd(__m256d a, __m256d b, __m256d c) { // CHECK-LABEL: test_mm256_nmacc_pd // CHECK: [[NEG:%.+]] = fneg <4 x double> %{{.+}} - // CHECK: call <4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}) + // CHECK: call {{.*}}<4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}) return _mm256_nmacc_pd(a, b, c); } @@ -220,7 +222,7 @@ __m256 test_mm256_nmsub_ps(__m256 a, __m256 b, __m256 c) { // CHECK-LABEL: test_mm256_nmsub_ps // CHECK: [[NEG:%.+]] = fneg <8 x float> %{{.*}} // CHECK: [[NEG2:%.+]] = fneg <8 x float> %{{.*}} - // CHECK: call <8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}) + // CHECK: call {{.*}}<8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}) return _mm256_nmsub_ps(a, b, c); } @@ -228,34 +230,34 @@ __m256d test_mm256_nmsub_pd(__m256d a, __m256d b, __m256d c) { // CHECK-LABEL: test_mm256_nmsub_pd // CHECK: [[NEG:%.+]] = fneg <4 x double> %{{.+}} // CHECK: [[NEG2:%.+]] = fneg <4 x double> %{{.+}} - // CHECK: call <4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}) + // CHECK: call {{.*}}<4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}) return _mm256_nmsub_pd(a, b, c); } __m256 test_mm256_maddsub_ps(__m256 a, __m256 b, __m256 c) { // CHECK-LABEL: test_mm256_maddsub_ps // CHECK-NOT: fneg - // CHECK: call <8 x float> @llvm.x86.fma.vfmaddsub.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}) + // CHECK: call {{.*}}<8 x float> @llvm.x86.fma.vfmaddsub.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}) return _mm256_maddsub_ps(a, b, c); } __m256d test_mm256_maddsub_pd(__m256d a, __m256d b, __m256d c) { // CHECK-LABEL: test_mm256_maddsub_pd // CHECK-NOT: fneg - // CHECK: call <4 x double> @llvm.x86.fma.vfmaddsub.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}) + // CHECK: call {{.*}}<4 x double> @llvm.x86.fma.vfmaddsub.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}) return _mm256_maddsub_pd(a, b, c); } __m256 test_mm256_msubadd_ps(__m256 a, __m256 b, __m256 c) { // CHECK-LABEL: test_mm256_msubadd_ps // CHECK: [[NEG:%.+]] = fneg <8 x float> %{{.*}} - // CHECK: call <8 x float> @llvm.x86.fma.vfmaddsub.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> [[NEG]]) + // CHECK: call {{.*}}<8 x float> @llvm.x86.fma.vfmaddsub.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> [[NEG]]) return _mm256_msubadd_ps(a, b, c); } __m256d test_mm256_msubadd_pd(__m256d a, __m256d b, __m256d c) { // CHECK-LABEL: test_mm256_msubadd_pd // CHECK: [[NEG:%.+]] = fneg <4 x double> {{.+}} - // CHECK: call <4 x double> @llvm.x86.fma.vfmaddsub.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> [[NEG]]) + // CHECK: call {{.*}}<4 x double> @llvm.x86.fma.vfmaddsub.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> [[NEG]]) return _mm256_msubadd_pd(a, b, c); } diff --git a/clang/test/CodeGen/X86/mmx-builtins.c b/clang/test/CodeGen/X86/mmx-builtins.c index 495ae7e1811592a..280faa0274cb85d 100644 --- a/clang/test/CodeGen/X86/mmx-builtins.c +++ b/clang/test/CodeGen/X86/mmx-builtins.c @@ -1,5 +1,7 @@ // RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +ssse3 -emit-llvm -o - -Wall -Werror | FileCheck %s --implicit-check-not=x86mmx +// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +ssse3 -emit-llvm -o - -Wall -Werror | FileCheck %s --implicit-check-not=x86mmx // RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +ssse3 -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s --implicit-check-not=x86mmx +// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +ssse3 -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s --implicit-check-not=x86mmx #include diff --git a/clang/test/CodeGen/X86/sse2-builtins.c b/clang/test/CodeGen/X86/sse2-builtins.c index a5348761ec84e0e..b16d610cabe6bb0 100644 --- a/clang/test/CodeGen/X86/sse2-builtins.c +++ b/clang/test/CodeGen/X86/sse2-builtins.c @@ -1778,3 +1778,16 @@ __m128i test_mm_xor_si128(__m128i A, __m128i B) { // CHECK: xor <2 x i64> %{{.*}}, %{{.*}} return _mm_xor_si128(A, B); } + +// Test constexpr handling. +#if defined(__cplusplus) && (__cplusplus >= 201103L) + +void test_constexpr() { + constexpr __m128d v_mm_setzero_pd = _mm_setzero_pd(); + static_assert(v_mm_setzero_pd[0] == +0.0 && v_mm_setzero_pd[1] == +0.0); + + constexpr __m128i v_mm_setzero_si128 = _mm_setzero_si128(); + static_assert(v_mm_setzero_si128[0] == 0x0000000000000000ULL && v_mm_setzero_si128[1] == 0x0000000000000000ULL); +} + +#endif diff --git a/clang/test/CodeGen/X86/sse3-builtins.c b/clang/test/CodeGen/X86/sse3-builtins.c index 57a18dadefaa894..18c062f4c14a7db 100644 --- a/clang/test/CodeGen/X86/sse3-builtins.c +++ b/clang/test/CodeGen/X86/sse3-builtins.c @@ -75,3 +75,22 @@ __m128 test_mm_moveldup_ps(__m128 A) { // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> return _mm_moveldup_ps(A); } + +// Test constexpr handling. +#if defined(__cplusplus) && (__cplusplus >= 201103L) + +void test_constexpr() { + constexpr __m128d kd1 {+7.0,-7.0}; + constexpr __m128 kf1 {+1.0f,-1.0f,+2.0f,+4.0f}; + + constexpr __m128d v_mm_movedup_pd = _mm_movedup_pd(kd1); + static_assert(v_mm_movedup_pd[0] == +7.0 && v_mm_movedup_pd[1] == +7.0); + + constexpr __m128 v_mm_movehdup_ps = _mm_movehdup_ps(kf1); + static_assert(v_mm_movehdup_ps[0] == -1.0f && v_mm_movehdup_ps[1] == -1.0f && v_mm_movehdup_ps[2] == +4.0f && v_mm_movehdup_ps[3] == +4.0f); + + constexpr __m128 v_mm_moveldup_ps = _mm_moveldup_ps(kf1); + static_assert(v_mm_moveldup_ps[0] == +1.0f && v_mm_moveldup_ps[1] == +1.0f && v_mm_moveldup_ps[2] == +2.0f && v_mm_moveldup_ps[3] == +2.0f); +} + +#endif diff --git a/clang/test/CodeGen/aarch64-fmv-dependencies.c b/clang/test/CodeGen/aarch64-fmv-dependencies.c index 681f7e82634fa8a..f4229a5d233970f 100644 --- a/clang/test/CodeGen/aarch64-fmv-dependencies.c +++ b/clang/test/CodeGen/aarch64-fmv-dependencies.c @@ -135,12 +135,9 @@ __attribute__((target_version("sme-i16i64"))) int fmv(void) { return 0; } // CHECK: define dso_local i32 @fmv._Msme2() #[[sme2:[0-9]+]] { __attribute__((target_version("sme2"))) int fmv(void) { return 0; } -// CHECK: define dso_local i32 @fmv._Mssbs() #[[ATTR0:[0-9]+]] { +// CHECK: define dso_local i32 @fmv._Mssbs() #[[ssbs:[0-9]+]] { __attribute__((target_version("ssbs"))) int fmv(void) { return 0; } -// CHECK: define dso_local i32 @fmv._Mssbs2() #[[ssbs2:[0-9]+]] { -__attribute__((target_version("ssbs2"))) int fmv(void) { return 0; } - // CHECK: define dso_local i32 @fmv._Msve() #[[sve:[0-9]+]] { __attribute__((target_version("sve"))) int fmv(void) { return 0; } @@ -219,7 +216,7 @@ int caller() { // CHECK: attributes #[[sme_f64f64]] = { {{.*}} "target-features"="+bf16,+fp-armv8,+neon,+outline-atomics,+sme,+sme-f64f64,+v8a" // CHECK: attributes #[[sme_i16i64]] = { {{.*}} "target-features"="+bf16,+fp-armv8,+neon,+outline-atomics,+sme,+sme-i16i64,+v8a" // CHECK: attributes #[[sme2]] = { {{.*}} "target-features"="+bf16,+fp-armv8,+neon,+outline-atomics,+sme,+sme2,+v8a" -// CHECK: attributes #[[ssbs2]] = { {{.*}} "target-features"="+fp-armv8,+neon,+outline-atomics,+ssbs,+v8a" +// CHECK: attributes #[[ssbs]] = { {{.*}} "target-features"="+fp-armv8,+neon,+outline-atomics,+ssbs,+v8a" // CHECK: attributes #[[sve]] = { {{.*}} "target-features"="+fp-armv8,+fullfp16,+neon,+outline-atomics,+sve,+v8a" // CHECK: attributes #[[sve_bf16_ebf16]] = { {{.*}} "target-features"="+bf16,+fp-armv8,+fullfp16,+neon,+outline-atomics,+sve,+v8a" // CHECK: attributes #[[sve_i8mm]] = { {{.*}} "target-features"="+fp-armv8,+fullfp16,+i8mm,+neon,+outline-atomics,+sve,+v8a" diff --git a/clang/test/CodeGen/aarch64-mixed-target-attributes.c b/clang/test/CodeGen/aarch64-mixed-target-attributes.c index d779abd395b5fb1..f9c1ae5b75a7242 100644 --- a/clang/test/CodeGen/aarch64-mixed-target-attributes.c +++ b/clang/test/CodeGen/aarch64-mixed-target-attributes.c @@ -1,4 +1,4 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals --include-generated-funcs +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals --include-generated-funcs --global-value-regex ".*" // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature -v9.5a -emit-llvm -o - %s | FileCheck %s // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature -fmv -emit-llvm -o - %s | FileCheck %s -check-prefix=CHECK-NOFMV @@ -255,18 +255,6 @@ __attribute__((target_version("jscvt"))) int default_def_with_version_decls(void // CHECK-NOFMV-NEXT: ret i32 0 // //. -// CHECK: attributes #[[ATTR0]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="-v9.5a" } -// CHECK: attributes #[[ATTR1]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+jsconv,+neon,-v9.5a" } -// CHECK: attributes #[[ATTR2]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+dotprod,+fp-armv8,+neon,-v9.5a" } -// CHECK: attributes #[[ATTR3]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+lse,-v9.5a" } -// CHECK: attributes #[[ATTR4]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+neon,+rdm,-v9.5a" } -// CHECK: attributes #[[ATTR5:[0-9]+]] = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+dotprod,+fp-armv8,+neon,-v9.5a" } -// CHECK: attributes #[[ATTR6:[0-9]+]] = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="-v9.5a" } -// CHECK: attributes #[[ATTR7:[0-9]+]] = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+lse,-v9.5a" } -// CHECK: attributes #[[ATTR8:[0-9]+]] = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+jsconv,+neon,-v9.5a" } -//. -// CHECK-NOFMV: attributes #[[ATTR0]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="-fmv" } -//. // CHECK: [[META0:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} // CHECK: [[META1:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} //. diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_ldr_str_zt.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_ldr_str_zt.c index b7ad0e19e53eb13..426e3d82e5e3411 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_ldr_str_zt.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_ldr_str_zt.c @@ -2,9 +2,9 @@ // REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64 -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -triple aarch64 -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s #include diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_zero_zt.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_zero_zt.c index 75612aa9fdfc4ad..202c90a9b3512c7 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_zero_zt.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_zero_zt.c @@ -2,9 +2,9 @@ // REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64 -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -triple aarch64 -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s #include diff --git a/clang/test/CodeGen/attr-target-clones-aarch64.c b/clang/test/CodeGen/attr-target-clones-aarch64.c index 274e05de594b8ea..292e544139e3ffa 100644 --- a/clang/test/CodeGen/attr-target-clones-aarch64.c +++ b/clang/test/CodeGen/attr-target-clones-aarch64.c @@ -1,4 +1,4 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-attributes --check-globals --include-generated-funcs +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals --include-generated-funcs --global-value-regex ".*" // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -emit-llvm -o - %s | FileCheck %s // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature -fmv -emit-llvm -o - %s | FileCheck %s -check-prefix=CHECK-NOFMV // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +mte -target-feature +bti -emit-llvm -o - %s | FileCheck %s -check-prefix=CHECK-MTE-BTI @@ -47,18 +47,20 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default")) // CHECK-MTE-BTI: @ftc_inline3 = weak_odr ifunc i32 (), ptr @ftc_inline3.resolver //. // CHECK: Function Attrs: noinline nounwind optnone -// CHECK-LABEL: @ftc._MaesMlse( +// CHECK-LABEL: define {{[^@]+}}@ftc._MaesMlse +// CHECK-SAME: () #[[ATTR0:[0-9]+]] { // CHECK-NEXT: entry: // CHECK-NEXT: ret i32 0 // // // CHECK: Function Attrs: noinline nounwind optnone -// CHECK-LABEL: @ftc._Msve2( +// CHECK-LABEL: define {{[^@]+}}@ftc._Msve2 +// CHECK-SAME: () #[[ATTR1:[0-9]+]] { // CHECK-NEXT: entry: // CHECK-NEXT: ret i32 0 // // -// CHECK-LABEL: @ftc.resolver( +// CHECK-LABEL: define {{[^@]+}}@ftc.resolver() comdat { // CHECK-NEXT: resolver_entry: // CHECK-NEXT: call void @__init_cpu_features_resolver() // CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 @@ -81,18 +83,20 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default")) // // // CHECK: Function Attrs: noinline nounwind optnone -// CHECK-LABEL: @ftc_def._Msha2( +// CHECK-LABEL: define {{[^@]+}}@ftc_def._Msha2 +// CHECK-SAME: () #[[ATTR2:[0-9]+]] { // CHECK-NEXT: entry: // CHECK-NEXT: ret i32 1 // // // CHECK: Function Attrs: noinline nounwind optnone -// CHECK-LABEL: @ftc_def._Mmemtag2Msha2( +// CHECK-LABEL: define {{[^@]+}}@ftc_def._Mmemtag2Msha2 +// CHECK-SAME: () #[[ATTR3:[0-9]+]] { // CHECK-NEXT: entry: // CHECK-NEXT: ret i32 1 // // -// CHECK-LABEL: @ftc_def.resolver( +// CHECK-LABEL: define {{[^@]+}}@ftc_def.resolver() comdat { // CHECK-NEXT: resolver_entry: // CHECK-NEXT: call void @__init_cpu_features_resolver() // CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 @@ -115,12 +119,13 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default")) // // // CHECK: Function Attrs: noinline nounwind optnone -// CHECK-LABEL: @ftc_dup1._Msha2( +// CHECK-LABEL: define {{[^@]+}}@ftc_dup1._Msha2 +// CHECK-SAME: () #[[ATTR2]] { // CHECK-NEXT: entry: // CHECK-NEXT: ret i32 2 // // -// CHECK-LABEL: @ftc_dup1.resolver( +// CHECK-LABEL: define {{[^@]+}}@ftc_dup1.resolver() comdat { // CHECK-NEXT: resolver_entry: // CHECK-NEXT: call void @__init_cpu_features_resolver() // CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 @@ -135,18 +140,20 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default")) // // // CHECK: Function Attrs: noinline nounwind optnone -// CHECK-LABEL: @ftc_dup2._Mfp( +// CHECK-LABEL: define {{[^@]+}}@ftc_dup2._Mfp +// CHECK-SAME: () #[[ATTR4:[0-9]+]] { // CHECK-NEXT: entry: // CHECK-NEXT: ret i32 3 // // // CHECK: Function Attrs: noinline nounwind optnone -// CHECK-LABEL: @ftc_dup2._McrcMdotprod( +// CHECK-LABEL: define {{[^@]+}}@ftc_dup2._McrcMdotprod +// CHECK-SAME: () #[[ATTR5:[0-9]+]] { // CHECK-NEXT: entry: // CHECK-NEXT: ret i32 3 // // -// CHECK-LABEL: @ftc_dup2.resolver( +// CHECK-LABEL: define {{[^@]+}}@ftc_dup2.resolver() comdat { // CHECK-NEXT: resolver_entry: // CHECK-NEXT: call void @__init_cpu_features_resolver() // CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 @@ -169,18 +176,20 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default")) // // // CHECK: Function Attrs: noinline nounwind optnone -// CHECK-LABEL: @ftc_dup3._Mmemtag2( +// CHECK-LABEL: define {{[^@]+}}@ftc_dup3._Mmemtag2 +// CHECK-SAME: () #[[ATTR6:[0-9]+]] { // CHECK-NEXT: entry: // CHECK-NEXT: ret i32 4 // // // CHECK: Function Attrs: noinline nounwind optnone -// CHECK-LABEL: @ftc_dup3._Mbti( +// CHECK-LABEL: define {{[^@]+}}@ftc_dup3._Mbti +// CHECK-SAME: () #[[ATTR7:[0-9]+]] { // CHECK-NEXT: entry: // CHECK-NEXT: ret i32 4 // // -// CHECK-LABEL: @ftc_dup3.resolver( +// CHECK-LABEL: define {{[^@]+}}@ftc_dup3.resolver() comdat { // CHECK-NEXT: resolver_entry: // CHECK-NEXT: call void @__init_cpu_features_resolver() // CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 @@ -203,7 +212,8 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default")) // // // CHECK: Function Attrs: noinline nounwind optnone -// CHECK-LABEL: @foo( +// CHECK-LABEL: define {{[^@]+}}@foo +// CHECK-SAME: () #[[ATTR8:[0-9]+]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[CALL:%.*]] = call i32 @ftc() // CHECK-NEXT: [[CALL1:%.*]] = call i32 @ftc_def() @@ -218,13 +228,15 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default")) // // // CHECK: Function Attrs: noinline nounwind optnone -// CHECK-LABEL: @ftc_direct( +// CHECK-LABEL: define {{[^@]+}}@ftc_direct +// CHECK-SAME: () #[[ATTR8]] { // CHECK-NEXT: entry: // CHECK-NEXT: ret i32 4 // // // CHECK: Function Attrs: noinline nounwind optnone -// CHECK-LABEL: @main( +// CHECK-LABEL: define {{[^@]+}}@main +// CHECK-SAME: () #[[ATTR8]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 // CHECK-NEXT: store i32 0, ptr [[RETVAL]], align 4 @@ -239,54 +251,62 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default")) // // // CHECK: Function Attrs: noinline nounwind optnone -// CHECK-LABEL: @ftc.default( +// CHECK-LABEL: define {{[^@]+}}@ftc.default +// CHECK-SAME: () #[[ATTR8]] { // CHECK-NEXT: entry: // CHECK-NEXT: ret i32 0 // // // CHECK: Function Attrs: noinline nounwind optnone -// CHECK-LABEL: @ftc_def.default( +// CHECK-LABEL: define {{[^@]+}}@ftc_def.default +// CHECK-SAME: () #[[ATTR8]] { // CHECK-NEXT: entry: // CHECK-NEXT: ret i32 1 // // // CHECK: Function Attrs: noinline nounwind optnone -// CHECK-LABEL: @ftc_dup1.default( +// CHECK-LABEL: define {{[^@]+}}@ftc_dup1.default +// CHECK-SAME: () #[[ATTR8]] { // CHECK-NEXT: entry: // CHECK-NEXT: ret i32 2 // // // CHECK: Function Attrs: noinline nounwind optnone -// CHECK-LABEL: @ftc_dup2.default( +// CHECK-LABEL: define {{[^@]+}}@ftc_dup2.default +// CHECK-SAME: () #[[ATTR8]] { // CHECK-NEXT: entry: // CHECK-NEXT: ret i32 3 // // // CHECK: Function Attrs: noinline nounwind optnone -// CHECK-LABEL: @ftc_dup3.default( +// CHECK-LABEL: define {{[^@]+}}@ftc_dup3.default +// CHECK-SAME: () #[[ATTR8]] { // CHECK-NEXT: entry: // CHECK-NEXT: ret i32 4 // // // CHECK: Function Attrs: noinline nounwind optnone -// CHECK-LABEL: @ftc_inline2._Mfp16( +// CHECK-LABEL: define {{[^@]+}}@ftc_inline2._Mfp16 +// CHECK-SAME: () #[[ATTR9:[0-9]+]] { // CHECK-NEXT: entry: // CHECK-NEXT: ret i32 2 // // // CHECK: Function Attrs: noinline nounwind optnone -// CHECK-LABEL: @ftc_inline2._MfcmaMsve2-bitperm( +// CHECK-LABEL: define {{[^@]+}}@ftc_inline2._MfcmaMsve2-bitperm +// CHECK-SAME: () #[[ATTR10:[0-9]+]] { // CHECK-NEXT: entry: // CHECK-NEXT: ret i32 2 // // // CHECK: Function Attrs: noinline nounwind optnone -// CHECK-LABEL: @ftc_inline2.default( +// CHECK-LABEL: define {{[^@]+}}@ftc_inline2.default +// CHECK-SAME: () #[[ATTR8]] { // CHECK-NEXT: entry: // CHECK-NEXT: ret i32 2 // // -// CHECK-LABEL: @ftc_inline2.resolver( +// CHECK-LABEL: define {{[^@]+}}@ftc_inline2.resolver() comdat { // CHECK-NEXT: resolver_entry: // CHECK-NEXT: call void @__init_cpu_features_resolver() // CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 @@ -309,30 +329,34 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default")) // // // CHECK: Function Attrs: noinline nounwind optnone -// CHECK-LABEL: @ftc_inline1._MrngMsimd( +// CHECK-LABEL: define {{[^@]+}}@ftc_inline1._MrngMsimd +// CHECK-SAME: () #[[ATTR11:[0-9]+]] { // CHECK-NEXT: entry: // CHECK-NEXT: ret i32 1 // // // CHECK: Function Attrs: noinline nounwind optnone -// CHECK-LABEL: @ftc_inline1._MpredresMrcpc( +// CHECK-LABEL: define {{[^@]+}}@ftc_inline1._MpredresMrcpc +// CHECK-SAME: () #[[ATTR12:[0-9]+]] { // CHECK-NEXT: entry: // CHECK-NEXT: ret i32 1 // // // CHECK: Function Attrs: noinline nounwind optnone -// CHECK-LABEL: @ftc_inline1._Msve2-aesMwfxt( +// CHECK-LABEL: define {{[^@]+}}@ftc_inline1._Msve2-aesMwfxt +// CHECK-SAME: () #[[ATTR13:[0-9]+]] { // CHECK-NEXT: entry: // CHECK-NEXT: ret i32 1 // // // CHECK: Function Attrs: noinline nounwind optnone -// CHECK-LABEL: @ftc_inline1.default( +// CHECK-LABEL: define {{[^@]+}}@ftc_inline1.default +// CHECK-SAME: () #[[ATTR8]] { // CHECK-NEXT: entry: // CHECK-NEXT: ret i32 1 // // -// CHECK-LABEL: @ftc_inline1.resolver( +// CHECK-LABEL: define {{[^@]+}}@ftc_inline1.resolver() comdat { // CHECK-NEXT: resolver_entry: // CHECK-NEXT: call void @__init_cpu_features_resolver() // CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 @@ -363,24 +387,27 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default")) // // // CHECK: Function Attrs: noinline nounwind optnone -// CHECK-LABEL: @ftc_inline3._Mbti( +// CHECK-LABEL: define {{[^@]+}}@ftc_inline3._Mbti +// CHECK-SAME: () #[[ATTR7]] { // CHECK-NEXT: entry: // CHECK-NEXT: ret i32 3 // // // CHECK: Function Attrs: noinline nounwind optnone -// CHECK-LABEL: @ftc_inline3._MsbMsve( +// CHECK-LABEL: define {{[^@]+}}@ftc_inline3._MsbMsve +// CHECK-SAME: () #[[ATTR14:[0-9]+]] { // CHECK-NEXT: entry: // CHECK-NEXT: ret i32 3 // // // CHECK: Function Attrs: noinline nounwind optnone -// CHECK-LABEL: @ftc_inline3.default( +// CHECK-LABEL: define {{[^@]+}}@ftc_inline3.default +// CHECK-SAME: () #[[ATTR8]] { // CHECK-NEXT: entry: // CHECK-NEXT: ret i32 3 // // -// CHECK-LABEL: @ftc_inline3.resolver( +// CHECK-LABEL: define {{[^@]+}}@ftc_inline3.resolver() comdat { // CHECK-NEXT: resolver_entry: // CHECK-NEXT: call void @__init_cpu_features_resolver() // CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 @@ -403,37 +430,43 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default")) // // // CHECK-NOFMV: Function Attrs: noinline nounwind optnone -// CHECK-NOFMV-LABEL: @ftc( +// CHECK-NOFMV-LABEL: define {{[^@]+}}@ftc +// CHECK-NOFMV-SAME: () #[[ATTR0:[0-9]+]] { // CHECK-NOFMV-NEXT: entry: // CHECK-NOFMV-NEXT: ret i32 0 // // // CHECK-NOFMV: Function Attrs: noinline nounwind optnone -// CHECK-NOFMV-LABEL: @ftc_def( +// CHECK-NOFMV-LABEL: define {{[^@]+}}@ftc_def +// CHECK-NOFMV-SAME: () #[[ATTR0]] { // CHECK-NOFMV-NEXT: entry: // CHECK-NOFMV-NEXT: ret i32 1 // // // CHECK-NOFMV: Function Attrs: noinline nounwind optnone -// CHECK-NOFMV-LABEL: @ftc_dup1( +// CHECK-NOFMV-LABEL: define {{[^@]+}}@ftc_dup1 +// CHECK-NOFMV-SAME: () #[[ATTR0]] { // CHECK-NOFMV-NEXT: entry: // CHECK-NOFMV-NEXT: ret i32 2 // // // CHECK-NOFMV: Function Attrs: noinline nounwind optnone -// CHECK-NOFMV-LABEL: @ftc_dup2( +// CHECK-NOFMV-LABEL: define {{[^@]+}}@ftc_dup2 +// CHECK-NOFMV-SAME: () #[[ATTR0]] { // CHECK-NOFMV-NEXT: entry: // CHECK-NOFMV-NEXT: ret i32 3 // // // CHECK-NOFMV: Function Attrs: noinline nounwind optnone -// CHECK-NOFMV-LABEL: @ftc_dup3( +// CHECK-NOFMV-LABEL: define {{[^@]+}}@ftc_dup3 +// CHECK-NOFMV-SAME: () #[[ATTR0]] { // CHECK-NOFMV-NEXT: entry: // CHECK-NOFMV-NEXT: ret i32 4 // // // CHECK-NOFMV: Function Attrs: noinline nounwind optnone -// CHECK-NOFMV-LABEL: @foo( +// CHECK-NOFMV-LABEL: define {{[^@]+}}@foo +// CHECK-NOFMV-SAME: () #[[ATTR0]] { // CHECK-NOFMV-NEXT: entry: // CHECK-NOFMV-NEXT: [[CALL:%.*]] = call i32 @ftc() // CHECK-NOFMV-NEXT: [[CALL1:%.*]] = call i32 @ftc_def() @@ -448,13 +481,15 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default")) // // // CHECK-NOFMV: Function Attrs: noinline nounwind optnone -// CHECK-NOFMV-LABEL: @ftc_direct( +// CHECK-NOFMV-LABEL: define {{[^@]+}}@ftc_direct +// CHECK-NOFMV-SAME: () #[[ATTR0]] { // CHECK-NOFMV-NEXT: entry: // CHECK-NOFMV-NEXT: ret i32 4 // // // CHECK-NOFMV: Function Attrs: noinline nounwind optnone -// CHECK-NOFMV-LABEL: @main( +// CHECK-NOFMV-LABEL: define {{[^@]+}}@main +// CHECK-NOFMV-SAME: () #[[ATTR0]] { // CHECK-NOFMV-NEXT: entry: // CHECK-NOFMV-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 // CHECK-NOFMV-NEXT: store i32 0, ptr [[RETVAL]], align 4 @@ -469,18 +504,20 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default")) // // // CHECK-MTE-BTI: Function Attrs: noinline nounwind optnone -// CHECK-MTE-BTI-LABEL: @ftc._MaesMlse( +// CHECK-MTE-BTI-LABEL: define {{[^@]+}}@ftc._MaesMlse +// CHECK-MTE-BTI-SAME: () #[[ATTR0:[0-9]+]] { // CHECK-MTE-BTI-NEXT: entry: // CHECK-MTE-BTI-NEXT: ret i32 0 // // // CHECK-MTE-BTI: Function Attrs: noinline nounwind optnone -// CHECK-MTE-BTI-LABEL: @ftc._Msve2( +// CHECK-MTE-BTI-LABEL: define {{[^@]+}}@ftc._Msve2 +// CHECK-MTE-BTI-SAME: () #[[ATTR1:[0-9]+]] { // CHECK-MTE-BTI-NEXT: entry: // CHECK-MTE-BTI-NEXT: ret i32 0 // // -// CHECK-MTE-BTI-LABEL: @ftc.resolver( +// CHECK-MTE-BTI-LABEL: define {{[^@]+}}@ftc.resolver() comdat { // CHECK-MTE-BTI-NEXT: resolver_entry: // CHECK-MTE-BTI-NEXT: call void @__init_cpu_features_resolver() // CHECK-MTE-BTI-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 @@ -503,18 +540,20 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default")) // // // CHECK-MTE-BTI: Function Attrs: noinline nounwind optnone -// CHECK-MTE-BTI-LABEL: @ftc_def._Msha2( +// CHECK-MTE-BTI-LABEL: define {{[^@]+}}@ftc_def._Msha2 +// CHECK-MTE-BTI-SAME: () #[[ATTR2:[0-9]+]] { // CHECK-MTE-BTI-NEXT: entry: // CHECK-MTE-BTI-NEXT: ret i32 1 // // // CHECK-MTE-BTI: Function Attrs: noinline nounwind optnone -// CHECK-MTE-BTI-LABEL: @ftc_def._Mmemtag2Msha2( +// CHECK-MTE-BTI-LABEL: define {{[^@]+}}@ftc_def._Mmemtag2Msha2 +// CHECK-MTE-BTI-SAME: () #[[ATTR2]] { // CHECK-MTE-BTI-NEXT: entry: // CHECK-MTE-BTI-NEXT: ret i32 1 // // -// CHECK-MTE-BTI-LABEL: @ftc_def.resolver( +// CHECK-MTE-BTI-LABEL: define {{[^@]+}}@ftc_def.resolver() comdat { // CHECK-MTE-BTI-NEXT: resolver_entry: // CHECK-MTE-BTI-NEXT: call void @__init_cpu_features_resolver() // CHECK-MTE-BTI-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 @@ -537,12 +576,13 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default")) // // // CHECK-MTE-BTI: Function Attrs: noinline nounwind optnone -// CHECK-MTE-BTI-LABEL: @ftc_dup1._Msha2( +// CHECK-MTE-BTI-LABEL: define {{[^@]+}}@ftc_dup1._Msha2 +// CHECK-MTE-BTI-SAME: () #[[ATTR2]] { // CHECK-MTE-BTI-NEXT: entry: // CHECK-MTE-BTI-NEXT: ret i32 2 // // -// CHECK-MTE-BTI-LABEL: @ftc_dup1.resolver( +// CHECK-MTE-BTI-LABEL: define {{[^@]+}}@ftc_dup1.resolver() comdat { // CHECK-MTE-BTI-NEXT: resolver_entry: // CHECK-MTE-BTI-NEXT: call void @__init_cpu_features_resolver() // CHECK-MTE-BTI-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 @@ -557,18 +597,20 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default")) // // // CHECK-MTE-BTI: Function Attrs: noinline nounwind optnone -// CHECK-MTE-BTI-LABEL: @ftc_dup2._Mfp( +// CHECK-MTE-BTI-LABEL: define {{[^@]+}}@ftc_dup2._Mfp +// CHECK-MTE-BTI-SAME: () #[[ATTR3:[0-9]+]] { // CHECK-MTE-BTI-NEXT: entry: // CHECK-MTE-BTI-NEXT: ret i32 3 // // // CHECK-MTE-BTI: Function Attrs: noinline nounwind optnone -// CHECK-MTE-BTI-LABEL: @ftc_dup2._McrcMdotprod( +// CHECK-MTE-BTI-LABEL: define {{[^@]+}}@ftc_dup2._McrcMdotprod +// CHECK-MTE-BTI-SAME: () #[[ATTR4:[0-9]+]] { // CHECK-MTE-BTI-NEXT: entry: // CHECK-MTE-BTI-NEXT: ret i32 3 // // -// CHECK-MTE-BTI-LABEL: @ftc_dup2.resolver( +// CHECK-MTE-BTI-LABEL: define {{[^@]+}}@ftc_dup2.resolver() comdat { // CHECK-MTE-BTI-NEXT: resolver_entry: // CHECK-MTE-BTI-NEXT: call void @__init_cpu_features_resolver() // CHECK-MTE-BTI-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 @@ -591,18 +633,20 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default")) // // // CHECK-MTE-BTI: Function Attrs: noinline nounwind optnone -// CHECK-MTE-BTI-LABEL: @ftc_dup3._Mmemtag2( +// CHECK-MTE-BTI-LABEL: define {{[^@]+}}@ftc_dup3._Mmemtag2 +// CHECK-MTE-BTI-SAME: () #[[ATTR5:[0-9]+]] { // CHECK-MTE-BTI-NEXT: entry: // CHECK-MTE-BTI-NEXT: ret i32 4 // // // CHECK-MTE-BTI: Function Attrs: noinline nounwind optnone -// CHECK-MTE-BTI-LABEL: @ftc_dup3._Mbti( +// CHECK-MTE-BTI-LABEL: define {{[^@]+}}@ftc_dup3._Mbti +// CHECK-MTE-BTI-SAME: () #[[ATTR5]] { // CHECK-MTE-BTI-NEXT: entry: // CHECK-MTE-BTI-NEXT: ret i32 4 // // -// CHECK-MTE-BTI-LABEL: @ftc_dup3.resolver( +// CHECK-MTE-BTI-LABEL: define {{[^@]+}}@ftc_dup3.resolver() comdat { // CHECK-MTE-BTI-NEXT: resolver_entry: // CHECK-MTE-BTI-NEXT: call void @__init_cpu_features_resolver() // CHECK-MTE-BTI-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 @@ -625,7 +669,8 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default")) // // // CHECK-MTE-BTI: Function Attrs: noinline nounwind optnone -// CHECK-MTE-BTI-LABEL: @foo( +// CHECK-MTE-BTI-LABEL: define {{[^@]+}}@foo +// CHECK-MTE-BTI-SAME: () #[[ATTR5]] { // CHECK-MTE-BTI-NEXT: entry: // CHECK-MTE-BTI-NEXT: [[CALL:%.*]] = call i32 @ftc() // CHECK-MTE-BTI-NEXT: [[CALL1:%.*]] = call i32 @ftc_def() @@ -640,13 +685,15 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default")) // // // CHECK-MTE-BTI: Function Attrs: noinline nounwind optnone -// CHECK-MTE-BTI-LABEL: @ftc_direct( +// CHECK-MTE-BTI-LABEL: define {{[^@]+}}@ftc_direct +// CHECK-MTE-BTI-SAME: () #[[ATTR5]] { // CHECK-MTE-BTI-NEXT: entry: // CHECK-MTE-BTI-NEXT: ret i32 4 // // // CHECK-MTE-BTI: Function Attrs: noinline nounwind optnone -// CHECK-MTE-BTI-LABEL: @main( +// CHECK-MTE-BTI-LABEL: define {{[^@]+}}@main +// CHECK-MTE-BTI-SAME: () #[[ATTR5]] { // CHECK-MTE-BTI-NEXT: entry: // CHECK-MTE-BTI-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 // CHECK-MTE-BTI-NEXT: store i32 0, ptr [[RETVAL]], align 4 @@ -661,54 +708,62 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default")) // // // CHECK-MTE-BTI: Function Attrs: noinline nounwind optnone -// CHECK-MTE-BTI-LABEL: @ftc.default( +// CHECK-MTE-BTI-LABEL: define {{[^@]+}}@ftc.default +// CHECK-MTE-BTI-SAME: () #[[ATTR5]] { // CHECK-MTE-BTI-NEXT: entry: // CHECK-MTE-BTI-NEXT: ret i32 0 // // // CHECK-MTE-BTI: Function Attrs: noinline nounwind optnone -// CHECK-MTE-BTI-LABEL: @ftc_def.default( +// CHECK-MTE-BTI-LABEL: define {{[^@]+}}@ftc_def.default +// CHECK-MTE-BTI-SAME: () #[[ATTR5]] { // CHECK-MTE-BTI-NEXT: entry: // CHECK-MTE-BTI-NEXT: ret i32 1 // // // CHECK-MTE-BTI: Function Attrs: noinline nounwind optnone -// CHECK-MTE-BTI-LABEL: @ftc_dup1.default( +// CHECK-MTE-BTI-LABEL: define {{[^@]+}}@ftc_dup1.default +// CHECK-MTE-BTI-SAME: () #[[ATTR5]] { // CHECK-MTE-BTI-NEXT: entry: // CHECK-MTE-BTI-NEXT: ret i32 2 // // // CHECK-MTE-BTI: Function Attrs: noinline nounwind optnone -// CHECK-MTE-BTI-LABEL: @ftc_dup2.default( +// CHECK-MTE-BTI-LABEL: define {{[^@]+}}@ftc_dup2.default +// CHECK-MTE-BTI-SAME: () #[[ATTR5]] { // CHECK-MTE-BTI-NEXT: entry: // CHECK-MTE-BTI-NEXT: ret i32 3 // // // CHECK-MTE-BTI: Function Attrs: noinline nounwind optnone -// CHECK-MTE-BTI-LABEL: @ftc_dup3.default( +// CHECK-MTE-BTI-LABEL: define {{[^@]+}}@ftc_dup3.default +// CHECK-MTE-BTI-SAME: () #[[ATTR5]] { // CHECK-MTE-BTI-NEXT: entry: // CHECK-MTE-BTI-NEXT: ret i32 4 // // // CHECK-MTE-BTI: Function Attrs: noinline nounwind optnone -// CHECK-MTE-BTI-LABEL: @ftc_inline2._Mfp16( +// CHECK-MTE-BTI-LABEL: define {{[^@]+}}@ftc_inline2._Mfp16 +// CHECK-MTE-BTI-SAME: () #[[ATTR6:[0-9]+]] { // CHECK-MTE-BTI-NEXT: entry: // CHECK-MTE-BTI-NEXT: ret i32 2 // // // CHECK-MTE-BTI: Function Attrs: noinline nounwind optnone -// CHECK-MTE-BTI-LABEL: @ftc_inline2._MfcmaMsve2-bitperm( +// CHECK-MTE-BTI-LABEL: define {{[^@]+}}@ftc_inline2._MfcmaMsve2-bitperm +// CHECK-MTE-BTI-SAME: () #[[ATTR7:[0-9]+]] { // CHECK-MTE-BTI-NEXT: entry: // CHECK-MTE-BTI-NEXT: ret i32 2 // // // CHECK-MTE-BTI: Function Attrs: noinline nounwind optnone -// CHECK-MTE-BTI-LABEL: @ftc_inline2.default( +// CHECK-MTE-BTI-LABEL: define {{[^@]+}}@ftc_inline2.default +// CHECK-MTE-BTI-SAME: () #[[ATTR5]] { // CHECK-MTE-BTI-NEXT: entry: // CHECK-MTE-BTI-NEXT: ret i32 2 // // -// CHECK-MTE-BTI-LABEL: @ftc_inline2.resolver( +// CHECK-MTE-BTI-LABEL: define {{[^@]+}}@ftc_inline2.resolver() comdat { // CHECK-MTE-BTI-NEXT: resolver_entry: // CHECK-MTE-BTI-NEXT: call void @__init_cpu_features_resolver() // CHECK-MTE-BTI-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 @@ -731,30 +786,34 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default")) // // // CHECK-MTE-BTI: Function Attrs: noinline nounwind optnone -// CHECK-MTE-BTI-LABEL: @ftc_inline1._MrngMsimd( +// CHECK-MTE-BTI-LABEL: define {{[^@]+}}@ftc_inline1._MrngMsimd +// CHECK-MTE-BTI-SAME: () #[[ATTR8:[0-9]+]] { // CHECK-MTE-BTI-NEXT: entry: // CHECK-MTE-BTI-NEXT: ret i32 1 // // // CHECK-MTE-BTI: Function Attrs: noinline nounwind optnone -// CHECK-MTE-BTI-LABEL: @ftc_inline1._MpredresMrcpc( +// CHECK-MTE-BTI-LABEL: define {{[^@]+}}@ftc_inline1._MpredresMrcpc +// CHECK-MTE-BTI-SAME: () #[[ATTR9:[0-9]+]] { // CHECK-MTE-BTI-NEXT: entry: // CHECK-MTE-BTI-NEXT: ret i32 1 // // // CHECK-MTE-BTI: Function Attrs: noinline nounwind optnone -// CHECK-MTE-BTI-LABEL: @ftc_inline1._Msve2-aesMwfxt( +// CHECK-MTE-BTI-LABEL: define {{[^@]+}}@ftc_inline1._Msve2-aesMwfxt +// CHECK-MTE-BTI-SAME: () #[[ATTR10:[0-9]+]] { // CHECK-MTE-BTI-NEXT: entry: // CHECK-MTE-BTI-NEXT: ret i32 1 // // // CHECK-MTE-BTI: Function Attrs: noinline nounwind optnone -// CHECK-MTE-BTI-LABEL: @ftc_inline1.default( +// CHECK-MTE-BTI-LABEL: define {{[^@]+}}@ftc_inline1.default +// CHECK-MTE-BTI-SAME: () #[[ATTR5]] { // CHECK-MTE-BTI-NEXT: entry: // CHECK-MTE-BTI-NEXT: ret i32 1 // // -// CHECK-MTE-BTI-LABEL: @ftc_inline1.resolver( +// CHECK-MTE-BTI-LABEL: define {{[^@]+}}@ftc_inline1.resolver() comdat { // CHECK-MTE-BTI-NEXT: resolver_entry: // CHECK-MTE-BTI-NEXT: call void @__init_cpu_features_resolver() // CHECK-MTE-BTI-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 @@ -785,24 +844,27 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default")) // // // CHECK-MTE-BTI: Function Attrs: noinline nounwind optnone -// CHECK-MTE-BTI-LABEL: @ftc_inline3._Mbti( +// CHECK-MTE-BTI-LABEL: define {{[^@]+}}@ftc_inline3._Mbti +// CHECK-MTE-BTI-SAME: () #[[ATTR5]] { // CHECK-MTE-BTI-NEXT: entry: // CHECK-MTE-BTI-NEXT: ret i32 3 // // // CHECK-MTE-BTI: Function Attrs: noinline nounwind optnone -// CHECK-MTE-BTI-LABEL: @ftc_inline3._MsbMsve( +// CHECK-MTE-BTI-LABEL: define {{[^@]+}}@ftc_inline3._MsbMsve +// CHECK-MTE-BTI-SAME: () #[[ATTR11:[0-9]+]] { // CHECK-MTE-BTI-NEXT: entry: // CHECK-MTE-BTI-NEXT: ret i32 3 // // // CHECK-MTE-BTI: Function Attrs: noinline nounwind optnone -// CHECK-MTE-BTI-LABEL: @ftc_inline3.default( +// CHECK-MTE-BTI-LABEL: define {{[^@]+}}@ftc_inline3.default +// CHECK-MTE-BTI-SAME: () #[[ATTR5]] { // CHECK-MTE-BTI-NEXT: entry: // CHECK-MTE-BTI-NEXT: ret i32 3 // // -// CHECK-MTE-BTI-LABEL: @ftc_inline3.resolver( +// CHECK-MTE-BTI-LABEL: define {{[^@]+}}@ftc_inline3.resolver() comdat { // CHECK-MTE-BTI-NEXT: resolver_entry: // CHECK-MTE-BTI-NEXT: call void @__init_cpu_features_resolver() // CHECK-MTE-BTI-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 @@ -824,38 +886,6 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default")) // CHECK-MTE-BTI-NEXT: ret ptr @ftc_inline3.default // //. -// CHECK: attributes #[[ATTR0:[0-9]+]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+lse,+neon" } -// CHECK: attributes #[[ATTR1:[0-9]+]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fullfp16,+neon,+sve,+sve2" } -// CHECK: attributes #[[ATTR2:[0-9]+]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+neon,+sha2" } -// CHECK: attributes #[[ATTR3:[0-9]+]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+mte,+neon,+sha2" } -// CHECK: attributes #[[ATTR4:[0-9]+]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+neon" } -// CHECK: attributes #[[ATTR5:[0-9]+]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+crc,+dotprod,+fp-armv8,+neon" } -// CHECK: attributes #[[ATTR6:[0-9]+]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+mte" } -// CHECK: attributes #[[ATTR7:[0-9]+]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bti" } -// CHECK: attributes #[[ATTR8:[0-9]+]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" } -// CHECK: attributes #[[ATTR9:[0-9]+]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fullfp16,+neon" } -// CHECK: attributes #[[ATTR10:[0-9]+]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+complxnum,+fp-armv8,+fullfp16,+neon,+sve,+sve2,+sve2-bitperm" } -// CHECK: attributes #[[ATTR11:[0-9]+]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+neon,+rand" } -// CHECK: attributes #[[ATTR12:[0-9]+]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+predres,+rcpc" } -// CHECK: attributes #[[ATTR13:[0-9]+]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fullfp16,+neon,+sve,+sve2,+sve2-aes,+wfxt" } -// CHECK: attributes #[[ATTR14:[0-9]+]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fullfp16,+neon,+sb,+sve" } -//. -// CHECK-NOFMV: attributes #[[ATTR0:[0-9]+]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="-fmv" } -// CHECK-NOFMV: attributes #[[ATTR1:[0-9]+]] = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="-fmv" } -//. -// CHECK-MTE-BTI: attributes #[[ATTR0:[0-9]+]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bti,+fp-armv8,+lse,+mte,+neon" } -// CHECK-MTE-BTI: attributes #[[ATTR1:[0-9]+]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bti,+fp-armv8,+fullfp16,+mte,+neon,+sve,+sve2" } -// CHECK-MTE-BTI: attributes #[[ATTR2:[0-9]+]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bti,+fp-armv8,+mte,+neon,+sha2" } -// CHECK-MTE-BTI: attributes #[[ATTR3:[0-9]+]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bti,+fp-armv8,+mte,+neon" } -// CHECK-MTE-BTI: attributes #[[ATTR4:[0-9]+]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bti,+crc,+dotprod,+fp-armv8,+mte,+neon" } -// CHECK-MTE-BTI: attributes #[[ATTR5:[0-9]+]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bti,+mte" } -// CHECK-MTE-BTI: attributes #[[ATTR6:[0-9]+]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bti,+fp-armv8,+fullfp16,+mte,+neon" } -// CHECK-MTE-BTI: attributes #[[ATTR7:[0-9]+]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bti,+complxnum,+fp-armv8,+fullfp16,+mte,+neon,+sve,+sve2,+sve2-bitperm" } -// CHECK-MTE-BTI: attributes #[[ATTR8:[0-9]+]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bti,+fp-armv8,+mte,+neon,+rand" } -// CHECK-MTE-BTI: attributes #[[ATTR9:[0-9]+]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bti,+mte,+predres,+rcpc" } -// CHECK-MTE-BTI: attributes #[[ATTR10:[0-9]+]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bti,+fp-armv8,+fullfp16,+mte,+neon,+sve,+sve2,+sve2-aes,+wfxt" } -// CHECK-MTE-BTI: attributes #[[ATTR11:[0-9]+]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bti,+fp-armv8,+fullfp16,+mte,+neon,+sb,+sve" } -//. // CHECK: [[META0:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} // CHECK: [[META1:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} //. diff --git a/clang/test/CodeGen/attr-target-version-riscv-invalid.c b/clang/test/CodeGen/attr-target-version-riscv-invalid.c new file mode 100644 index 000000000000000..0948b3bfd9ef323 --- /dev/null +++ b/clang/test/CodeGen/attr-target-version-riscv-invalid.c @@ -0,0 +1,13 @@ +// RUN: not %clang_cc1 -triple riscv64 -target-feature +i -emit-llvm -o - %s 2>&1 | FileCheck %s --check-prefix=CHECK-UNSUPPORT-OS + +// CHECK-UNSUPPORT-OS: error: function multiversioning is currently only supported on Linux +__attribute__((target_version("default"))) int foo(void) { + return 2; +} + +__attribute__((target_version("arch=+c"))) int foo(void) { + return 2; +} + + +int bar() { return foo(); } diff --git a/clang/test/CodeGen/attr-target-version-riscv.c b/clang/test/CodeGen/attr-target-version-riscv.c new file mode 100644 index 000000000000000..7d0e61e61542f2f --- /dev/null +++ b/clang/test/CodeGen/attr-target-version-riscv.c @@ -0,0 +1,443 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals all --include-generated-funcs --version 4 +// RUN: %clang_cc1 -triple riscv64-linux-gnu -target-feature +i -emit-llvm -o - %s | FileCheck %s + +__attribute__((target_version("arch=+v"))) int foo1(void) { return 1; } +__attribute__((target_version("default"))) int foo1(void) { return 1; } + +__attribute__((target_version("arch=+zbb"))) int foo2(void) { return 1; } +__attribute__((target_version("arch=+m"))) int foo2(void) { return 1; } +__attribute__((target_version("default"))) int foo2(void) { return 1; } + +__attribute__((target_version("arch=+zbb,+c"))) int foo3(void) { return 1; } +__attribute__((target_version("arch=+m"))) int foo3(void) { return 1; } +__attribute__((target_version("default"))) int foo3(void) { return 1; } + +__attribute__((target_version("arch=+zba"))) int foo4(void) { return 1; } +__attribute__((target_version("arch=+zbb"))) int foo4(void) { return 1; } +__attribute__((target_version("arch=+zbb,+zba"))) int foo4(void) { return 1; } +__attribute__((target_version("default"))) int foo4(void) { return 1; } + +__attribute__((target_version("arch=+zba"))) int foo5(void) { return 1; } +__attribute__((target_version("arch=+zbb,+zba"))) int foo5(void) { return 1; } +__attribute__((target_version("arch=+zbb"))) int foo5(void) { return 1; } +__attribute__((target_version("default"))) int foo5(void) { return 1; } + +__attribute__((target_version("arch=+zba"))) int foo6(void) { return 1; } +__attribute__((target_version("arch=+zbb"))) int foo6(void) { return 1; } +__attribute__((target_version("arch=+zbb,+zba;priority=10"))) int foo6(void) { return 1; } +__attribute__((target_version("default"))) int foo6(void) { return 1; } + +__attribute__((target_version("priority=8;arch=+zba"))) int foo7(void) { return 1; } +__attribute__((target_version("arch=+zbb;priority=9"))) int foo7(void) { return 1; } +__attribute__((target_version("arch=+zbb,+zba;priority=10"))) int foo7(void) { return 1; } +__attribute__((target_version("default"))) int foo7(void) { return 1; } + +__attribute__((target_version("priority=-1;arch=+zba"))) int foo8(void) { return 1; } +__attribute__((target_version("arch=+zbb;priority=-2"))) int foo8(void) { return 1; } +__attribute__((target_version("arch=+zbb,+zba;priority=3"))) int foo8(void) { return 1; } +__attribute__((target_version("default"))) int foo8(void) { return 1; } + +int bar() { return foo1() + foo2() + foo3() + foo4() + foo5() + foo6() + foo7() + foo8(); } +//. +// CHECK: @__riscv_feature_bits = external dso_local global { i32, [2 x i64] } +// CHECK: @foo1 = weak_odr ifunc i32 (), ptr @foo1.resolver +// CHECK: @foo2 = weak_odr ifunc i32 (), ptr @foo2.resolver +// CHECK: @foo3 = weak_odr ifunc i32 (), ptr @foo3.resolver +// CHECK: @foo4 = weak_odr ifunc i32 (), ptr @foo4.resolver +// CHECK: @foo5 = weak_odr ifunc i32 (), ptr @foo5.resolver +// CHECK: @foo6 = weak_odr ifunc i32 (), ptr @foo6.resolver +// CHECK: @foo7 = weak_odr ifunc i32 (), ptr @foo7.resolver +// CHECK: @foo8 = weak_odr ifunc i32 (), ptr @foo8.resolver +//. +// CHECK-LABEL: define dso_local signext i32 @foo1._v( +// CHECK-SAME: ) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 1 +// +// +// CHECK-LABEL: define dso_local signext i32 @foo1.default( +// CHECK-SAME: ) #[[ATTR1:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 1 +// +// +// CHECK-LABEL: define dso_local signext i32 @foo2._zbb( +// CHECK-SAME: ) #[[ATTR2:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 1 +// +// +// CHECK-LABEL: define dso_local signext i32 @foo2._m( +// CHECK-SAME: ) #[[ATTR3:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 1 +// +// +// CHECK-LABEL: define dso_local signext i32 @foo2.default( +// CHECK-SAME: ) #[[ATTR1]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 1 +// +// +// CHECK-LABEL: define dso_local signext i32 @foo3._c_zbb( +// CHECK-SAME: ) #[[ATTR4:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 1 +// +// +// CHECK-LABEL: define dso_local signext i32 @foo3._m( +// CHECK-SAME: ) #[[ATTR3]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 1 +// +// +// CHECK-LABEL: define dso_local signext i32 @foo3.default( +// CHECK-SAME: ) #[[ATTR1]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 1 +// +// +// CHECK-LABEL: define dso_local signext i32 @foo4._zba( +// CHECK-SAME: ) #[[ATTR5:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 1 +// +// +// CHECK-LABEL: define dso_local signext i32 @foo4._zbb( +// CHECK-SAME: ) #[[ATTR2]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 1 +// +// +// CHECK-LABEL: define dso_local signext i32 @foo4._zba_zbb( +// CHECK-SAME: ) #[[ATTR6:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 1 +// +// +// CHECK-LABEL: define dso_local signext i32 @foo4.default( +// CHECK-SAME: ) #[[ATTR1]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 1 +// +// +// CHECK-LABEL: define dso_local signext i32 @foo5._zba( +// CHECK-SAME: ) #[[ATTR5]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 1 +// +// +// CHECK-LABEL: define dso_local signext i32 @foo5._zba_zbb( +// CHECK-SAME: ) #[[ATTR6]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 1 +// +// +// CHECK-LABEL: define dso_local signext i32 @foo5._zbb( +// CHECK-SAME: ) #[[ATTR2]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 1 +// +// +// CHECK-LABEL: define dso_local signext i32 @foo5.default( +// CHECK-SAME: ) #[[ATTR1]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 1 +// +// +// CHECK-LABEL: define dso_local signext i32 @foo6._zba( +// CHECK-SAME: ) #[[ATTR5]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 1 +// +// +// CHECK-LABEL: define dso_local signext i32 @foo6._zbb( +// CHECK-SAME: ) #[[ATTR2]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 1 +// +// +// CHECK-LABEL: define dso_local signext i32 @foo6._zba_zbb( +// CHECK-SAME: ) #[[ATTR6]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 1 +// +// +// CHECK-LABEL: define dso_local signext i32 @foo6.default( +// CHECK-SAME: ) #[[ATTR1]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 1 +// +// +// CHECK-LABEL: define dso_local signext i32 @foo7._zba( +// CHECK-SAME: ) #[[ATTR5]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 1 +// +// +// CHECK-LABEL: define dso_local signext i32 @foo7._zbb( +// CHECK-SAME: ) #[[ATTR2]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 1 +// +// +// CHECK-LABEL: define dso_local signext i32 @foo7._zba_zbb( +// CHECK-SAME: ) #[[ATTR6]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 1 +// +// +// CHECK-LABEL: define dso_local signext i32 @foo7.default( +// CHECK-SAME: ) #[[ATTR1]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 1 +// +// +// CHECK-LABEL: define dso_local signext i32 @foo8._zba( +// CHECK-SAME: ) #[[ATTR5]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 1 +// +// +// CHECK-LABEL: define dso_local signext i32 @foo8._zbb( +// CHECK-SAME: ) #[[ATTR2]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 1 +// +// +// CHECK-LABEL: define dso_local signext i32 @foo8._zba_zbb( +// CHECK-SAME: ) #[[ATTR6]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 1 +// +// +// CHECK-LABEL: define dso_local signext i32 @foo8.default( +// CHECK-SAME: ) #[[ATTR1]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 1 +// +// +// CHECK-LABEL: define dso_local signext i32 @bar( +// CHECK-SAME: ) #[[ATTR1]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[CALL:%.*]] = call signext i32 @foo1() +// CHECK-NEXT: [[CALL1:%.*]] = call signext i32 @foo2() +// CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[CALL]], [[CALL1]] +// CHECK-NEXT: [[CALL2:%.*]] = call signext i32 @foo3() +// CHECK-NEXT: [[ADD3:%.*]] = add nsw i32 [[ADD]], [[CALL2]] +// CHECK-NEXT: [[CALL4:%.*]] = call signext i32 @foo4() +// CHECK-NEXT: [[ADD5:%.*]] = add nsw i32 [[ADD3]], [[CALL4]] +// CHECK-NEXT: [[CALL6:%.*]] = call signext i32 @foo5() +// CHECK-NEXT: [[ADD7:%.*]] = add nsw i32 [[ADD5]], [[CALL6]] +// CHECK-NEXT: [[CALL8:%.*]] = call signext i32 @foo6() +// CHECK-NEXT: [[ADD9:%.*]] = add nsw i32 [[ADD7]], [[CALL8]] +// CHECK-NEXT: [[CALL10:%.*]] = call signext i32 @foo7() +// CHECK-NEXT: [[ADD11:%.*]] = add nsw i32 [[ADD9]], [[CALL10]] +// CHECK-NEXT: [[CALL12:%.*]] = call signext i32 @foo8() +// CHECK-NEXT: [[ADD13:%.*]] = add nsw i32 [[ADD11]], [[CALL12]] +// CHECK-NEXT: ret i32 [[ADD13]] +// +// +// CHECK-LABEL: define weak_odr ptr @foo1.resolver() comdat { +// CHECK-NEXT: resolver_entry: +// CHECK-NEXT: call void @__init_riscv_feature_bits(ptr null) +// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr getelementptr inbounds ({ i32, [2 x i64] }, ptr @__riscv_feature_bits, i32 0, i32 1, i32 0), align 8 +// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 2097152 +// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 2097152 +// CHECK-NEXT: br i1 [[TMP2]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]] +// CHECK: resolver_return: +// CHECK-NEXT: ret ptr @foo1._v +// CHECK: resolver_else: +// CHECK-NEXT: ret ptr @foo1.default +// +// +// CHECK-LABEL: define weak_odr ptr @foo2.resolver() comdat { +// CHECK-NEXT: resolver_entry: +// CHECK-NEXT: call void @__init_riscv_feature_bits(ptr null) +// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr getelementptr inbounds ({ i32, [2 x i64] }, ptr @__riscv_feature_bits, i32 0, i32 1, i32 0), align 8 +// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 268435456 +// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 268435456 +// CHECK-NEXT: br i1 [[TMP2]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]] +// CHECK: resolver_return: +// CHECK-NEXT: ret ptr @foo2._zbb +// CHECK: resolver_else: +// CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr getelementptr inbounds ({ i32, [2 x i64] }, ptr @__riscv_feature_bits, i32 0, i32 1, i32 0), align 8 +// CHECK-NEXT: [[TMP4:%.*]] = and i64 [[TMP3]], 4096 +// CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[TMP4]], 4096 +// CHECK-NEXT: br i1 [[TMP5]], label [[RESOLVER_RETURN1:%.*]], label [[RESOLVER_ELSE2:%.*]] +// CHECK: resolver_return1: +// CHECK-NEXT: ret ptr @foo2._m +// CHECK: resolver_else2: +// CHECK-NEXT: ret ptr @foo2.default +// +// +// CHECK-LABEL: define weak_odr ptr @foo3.resolver() comdat { +// CHECK-NEXT: resolver_entry: +// CHECK-NEXT: call void @__init_riscv_feature_bits(ptr null) +// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr getelementptr inbounds ({ i32, [2 x i64] }, ptr @__riscv_feature_bits, i32 0, i32 1, i32 0), align 8 +// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 268435460 +// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 268435460 +// CHECK-NEXT: br i1 [[TMP2]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]] +// CHECK: resolver_return: +// CHECK-NEXT: ret ptr @foo3._c_zbb +// CHECK: resolver_else: +// CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr getelementptr inbounds ({ i32, [2 x i64] }, ptr @__riscv_feature_bits, i32 0, i32 1, i32 0), align 8 +// CHECK-NEXT: [[TMP4:%.*]] = and i64 [[TMP3]], 4096 +// CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[TMP4]], 4096 +// CHECK-NEXT: br i1 [[TMP5]], label [[RESOLVER_RETURN1:%.*]], label [[RESOLVER_ELSE2:%.*]] +// CHECK: resolver_return1: +// CHECK-NEXT: ret ptr @foo3._m +// CHECK: resolver_else2: +// CHECK-NEXT: ret ptr @foo3.default +// +// +// CHECK-LABEL: define weak_odr ptr @foo4.resolver() comdat { +// CHECK-NEXT: resolver_entry: +// CHECK-NEXT: call void @__init_riscv_feature_bits(ptr null) +// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr getelementptr inbounds ({ i32, [2 x i64] }, ptr @__riscv_feature_bits, i32 0, i32 1, i32 0), align 8 +// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 134217728 +// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 134217728 +// CHECK-NEXT: br i1 [[TMP2]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]] +// CHECK: resolver_return: +// CHECK-NEXT: ret ptr @foo4._zba +// CHECK: resolver_else: +// CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr getelementptr inbounds ({ i32, [2 x i64] }, ptr @__riscv_feature_bits, i32 0, i32 1, i32 0), align 8 +// CHECK-NEXT: [[TMP4:%.*]] = and i64 [[TMP3]], 268435456 +// CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[TMP4]], 268435456 +// CHECK-NEXT: br i1 [[TMP5]], label [[RESOLVER_RETURN1:%.*]], label [[RESOLVER_ELSE2:%.*]] +// CHECK: resolver_return1: +// CHECK-NEXT: ret ptr @foo4._zbb +// CHECK: resolver_else2: +// CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr getelementptr inbounds ({ i32, [2 x i64] }, ptr @__riscv_feature_bits, i32 0, i32 1, i32 0), align 8 +// CHECK-NEXT: [[TMP7:%.*]] = and i64 [[TMP6]], 402653184 +// CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[TMP7]], 402653184 +// CHECK-NEXT: br i1 [[TMP8]], label [[RESOLVER_RETURN3:%.*]], label [[RESOLVER_ELSE4:%.*]] +// CHECK: resolver_return3: +// CHECK-NEXT: ret ptr @foo4._zba_zbb +// CHECK: resolver_else4: +// CHECK-NEXT: ret ptr @foo4.default +// +// +// CHECK-LABEL: define weak_odr ptr @foo5.resolver() comdat { +// CHECK-NEXT: resolver_entry: +// CHECK-NEXT: call void @__init_riscv_feature_bits(ptr null) +// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr getelementptr inbounds ({ i32, [2 x i64] }, ptr @__riscv_feature_bits, i32 0, i32 1, i32 0), align 8 +// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 134217728 +// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 134217728 +// CHECK-NEXT: br i1 [[TMP2]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]] +// CHECK: resolver_return: +// CHECK-NEXT: ret ptr @foo5._zba +// CHECK: resolver_else: +// CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr getelementptr inbounds ({ i32, [2 x i64] }, ptr @__riscv_feature_bits, i32 0, i32 1, i32 0), align 8 +// CHECK-NEXT: [[TMP4:%.*]] = and i64 [[TMP3]], 402653184 +// CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[TMP4]], 402653184 +// CHECK-NEXT: br i1 [[TMP5]], label [[RESOLVER_RETURN1:%.*]], label [[RESOLVER_ELSE2:%.*]] +// CHECK: resolver_return1: +// CHECK-NEXT: ret ptr @foo5._zba_zbb +// CHECK: resolver_else2: +// CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr getelementptr inbounds ({ i32, [2 x i64] }, ptr @__riscv_feature_bits, i32 0, i32 1, i32 0), align 8 +// CHECK-NEXT: [[TMP7:%.*]] = and i64 [[TMP6]], 268435456 +// CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[TMP7]], 268435456 +// CHECK-NEXT: br i1 [[TMP8]], label [[RESOLVER_RETURN3:%.*]], label [[RESOLVER_ELSE4:%.*]] +// CHECK: resolver_return3: +// CHECK-NEXT: ret ptr @foo5._zbb +// CHECK: resolver_else4: +// CHECK-NEXT: ret ptr @foo5.default +// +// +// CHECK-LABEL: define weak_odr ptr @foo6.resolver() comdat { +// CHECK-NEXT: resolver_entry: +// CHECK-NEXT: call void @__init_riscv_feature_bits(ptr null) +// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr getelementptr inbounds ({ i32, [2 x i64] }, ptr @__riscv_feature_bits, i32 0, i32 1, i32 0), align 8 +// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 402653184 +// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 402653184 +// CHECK-NEXT: br i1 [[TMP2]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]] +// CHECK: resolver_return: +// CHECK-NEXT: ret ptr @foo6._zba_zbb +// CHECK: resolver_else: +// CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr getelementptr inbounds ({ i32, [2 x i64] }, ptr @__riscv_feature_bits, i32 0, i32 1, i32 0), align 8 +// CHECK-NEXT: [[TMP4:%.*]] = and i64 [[TMP3]], 134217728 +// CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[TMP4]], 134217728 +// CHECK-NEXT: br i1 [[TMP5]], label [[RESOLVER_RETURN1:%.*]], label [[RESOLVER_ELSE2:%.*]] +// CHECK: resolver_return1: +// CHECK-NEXT: ret ptr @foo6._zba +// CHECK: resolver_else2: +// CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr getelementptr inbounds ({ i32, [2 x i64] }, ptr @__riscv_feature_bits, i32 0, i32 1, i32 0), align 8 +// CHECK-NEXT: [[TMP7:%.*]] = and i64 [[TMP6]], 268435456 +// CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[TMP7]], 268435456 +// CHECK-NEXT: br i1 [[TMP8]], label [[RESOLVER_RETURN3:%.*]], label [[RESOLVER_ELSE4:%.*]] +// CHECK: resolver_return3: +// CHECK-NEXT: ret ptr @foo6._zbb +// CHECK: resolver_else4: +// CHECK-NEXT: ret ptr @foo6.default +// +// +// CHECK-LABEL: define weak_odr ptr @foo7.resolver() comdat { +// CHECK-NEXT: resolver_entry: +// CHECK-NEXT: call void @__init_riscv_feature_bits(ptr null) +// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr getelementptr inbounds ({ i32, [2 x i64] }, ptr @__riscv_feature_bits, i32 0, i32 1, i32 0), align 8 +// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 402653184 +// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 402653184 +// CHECK-NEXT: br i1 [[TMP2]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]] +// CHECK: resolver_return: +// CHECK-NEXT: ret ptr @foo7._zba_zbb +// CHECK: resolver_else: +// CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr getelementptr inbounds ({ i32, [2 x i64] }, ptr @__riscv_feature_bits, i32 0, i32 1, i32 0), align 8 +// CHECK-NEXT: [[TMP4:%.*]] = and i64 [[TMP3]], 268435456 +// CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[TMP4]], 268435456 +// CHECK-NEXT: br i1 [[TMP5]], label [[RESOLVER_RETURN1:%.*]], label [[RESOLVER_ELSE2:%.*]] +// CHECK: resolver_return1: +// CHECK-NEXT: ret ptr @foo7._zbb +// CHECK: resolver_else2: +// CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr getelementptr inbounds ({ i32, [2 x i64] }, ptr @__riscv_feature_bits, i32 0, i32 1, i32 0), align 8 +// CHECK-NEXT: [[TMP7:%.*]] = and i64 [[TMP6]], 134217728 +// CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[TMP7]], 134217728 +// CHECK-NEXT: br i1 [[TMP8]], label [[RESOLVER_RETURN3:%.*]], label [[RESOLVER_ELSE4:%.*]] +// CHECK: resolver_return3: +// CHECK-NEXT: ret ptr @foo7._zba +// CHECK: resolver_else4: +// CHECK-NEXT: ret ptr @foo7.default +// +// +// CHECK-LABEL: define weak_odr ptr @foo8.resolver() comdat { +// CHECK-NEXT: resolver_entry: +// CHECK-NEXT: call void @__init_riscv_feature_bits(ptr null) +// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr getelementptr inbounds ({ i32, [2 x i64] }, ptr @__riscv_feature_bits, i32 0, i32 1, i32 0), align 8 +// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 402653184 +// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 402653184 +// CHECK-NEXT: br i1 [[TMP2]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]] +// CHECK: resolver_return: +// CHECK-NEXT: ret ptr @foo8._zba_zbb +// CHECK: resolver_else: +// CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr getelementptr inbounds ({ i32, [2 x i64] }, ptr @__riscv_feature_bits, i32 0, i32 1, i32 0), align 8 +// CHECK-NEXT: [[TMP4:%.*]] = and i64 [[TMP3]], 134217728 +// CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[TMP4]], 134217728 +// CHECK-NEXT: br i1 [[TMP5]], label [[RESOLVER_RETURN1:%.*]], label [[RESOLVER_ELSE2:%.*]] +// CHECK: resolver_return1: +// CHECK-NEXT: ret ptr @foo8._zba +// CHECK: resolver_else2: +// CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr getelementptr inbounds ({ i32, [2 x i64] }, ptr @__riscv_feature_bits, i32 0, i32 1, i32 0), align 8 +// CHECK-NEXT: [[TMP7:%.*]] = and i64 [[TMP6]], 268435456 +// CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[TMP7]], 268435456 +// CHECK-NEXT: br i1 [[TMP8]], label [[RESOLVER_RETURN3:%.*]], label [[RESOLVER_ELSE4:%.*]] +// CHECK: resolver_return3: +// CHECK-NEXT: ret ptr @foo8._zbb +// CHECK: resolver_else4: +// CHECK-NEXT: ret ptr @foo8.default +// +//. +// CHECK: attributes #[[ATTR0]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+64bit,+d,+f,+i,+v,+zicsr,+zve32f,+zve32x,+zve64d,+zve64f,+zve64x,+zvl128b,+zvl32b,+zvl64b" } +// CHECK: attributes #[[ATTR1]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+64bit,+i" } +// CHECK: attributes #[[ATTR2]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+64bit,+i,+zbb" } +// CHECK: attributes #[[ATTR3]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+64bit,+i,+m,+zmmul" } +// CHECK: attributes #[[ATTR4]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+64bit,+c,+i,+zbb" } +// CHECK: attributes #[[ATTR5]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+64bit,+i,+zba" } +// CHECK: attributes #[[ATTR6]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+64bit,+i,+zba,+zbb" } +//. +// CHECK: [[META0:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} +// CHECK: [[META1:![0-9]+]] = !{i32 1, !"target-abi", !"lp64"} +// CHECK: [[META2:![0-9]+]] = !{i32 6, !"riscv-isa", [[META3:![0-9]+]]} +// CHECK: [[META3]] = !{!"rv64i2p1"} +// CHECK: [[META4:![0-9]+]] = !{i32 8, !"SmallDataLimit", i32 0} +// CHECK: [[META5:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} +//. diff --git a/clang/test/CodeGen/attr-target-version.c b/clang/test/CodeGen/attr-target-version.c index 228435a0494c3e5..22a53c82bfbf9fa 100644 --- a/clang/test/CodeGen/attr-target-version.c +++ b/clang/test/CodeGen/attr-target-version.c @@ -1,4 +1,4 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals --include-generated-funcs +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals --include-generated-funcs --global-value-regex ".*" // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -emit-llvm -o - %s | FileCheck %s // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature -fmv -emit-llvm -o - %s | FileCheck %s -check-prefix=CHECK-NOFMV @@ -296,7 +296,7 @@ int caller(void) { return used_def_without_default_decl() + used_decl_without_de // // CHECK: Function Attrs: noinline nounwind optnone // CHECK-LABEL: define {{[^@]+}}@fmv_c._Mssbs -// CHECK-SAME: () #[[ATTR9]] { +// CHECK-SAME: () #[[ATTR13:[0-9]+]] { // CHECK-NEXT: entry: // CHECK-NEXT: ret void // @@ -354,14 +354,14 @@ int caller(void) { return used_def_without_default_decl() + used_decl_without_de // // CHECK: Function Attrs: noinline nounwind optnone // CHECK-LABEL: define {{[^@]+}}@unused_with_forward_default_decl._Mmops -// CHECK-SAME: () #[[ATTR14:[0-9]+]] { +// CHECK-SAME: () #[[ATTR15:[0-9]+]] { // CHECK-NEXT: entry: // CHECK-NEXT: ret i32 0 // // // CHECK: Function Attrs: noinline nounwind optnone // CHECK-LABEL: define {{[^@]+}}@unused_with_implicit_extern_forward_default_decl._Mdotprod -// CHECK-SAME: () #[[ATTR15:[0-9]+]] { +// CHECK-SAME: () #[[ATTR16:[0-9]+]] { // CHECK-NEXT: entry: // CHECK-NEXT: ret i32 0 // @@ -375,7 +375,7 @@ int caller(void) { return used_def_without_default_decl() + used_decl_without_de // // CHECK: Function Attrs: noinline nounwind optnone // CHECK-LABEL: define {{[^@]+}}@unused_with_default_def._Msve -// CHECK-SAME: () #[[ATTR16:[0-9]+]] { +// CHECK-SAME: () #[[ATTR17:[0-9]+]] { // CHECK-NEXT: entry: // CHECK-NEXT: ret i32 0 // @@ -410,14 +410,14 @@ int caller(void) { return used_def_without_default_decl() + used_decl_without_de // // CHECK: Function Attrs: noinline nounwind optnone // CHECK-LABEL: define {{[^@]+}}@unused_with_implicit_forward_default_def._Mlse -// CHECK-SAME: () #[[ATTR17:[0-9]+]] { +// CHECK-SAME: () #[[ATTR18:[0-9]+]] { // CHECK-NEXT: entry: // CHECK-NEXT: ret i32 1 // // // CHECK: Function Attrs: noinline nounwind optnone // CHECK-LABEL: define {{[^@]+}}@unused_without_default._Mrdm -// CHECK-SAME: () #[[ATTR18:[0-9]+]] { +// CHECK-SAME: () #[[ATTR19:[0-9]+]] { // CHECK-NEXT: entry: // CHECK-NEXT: ret i32 0 // @@ -431,14 +431,14 @@ int caller(void) { return used_def_without_default_decl() + used_decl_without_de // // CHECK: Function Attrs: noinline nounwind optnone // CHECK-LABEL: define {{[^@]+}}@used_def_without_default_decl._Mjscvt -// CHECK-SAME: () #[[ATTR20:[0-9]+]] { +// CHECK-SAME: () #[[ATTR21:[0-9]+]] { // CHECK-NEXT: entry: // CHECK-NEXT: ret i32 1 // // // CHECK: Function Attrs: noinline nounwind optnone // CHECK-LABEL: define {{[^@]+}}@used_def_without_default_decl._Mrdm -// CHECK-SAME: () #[[ATTR18]] { +// CHECK-SAME: () #[[ATTR19]] { // CHECK-NEXT: entry: // CHECK-NEXT: ret i32 2 // @@ -618,7 +618,7 @@ int caller(void) { return used_def_without_default_decl() + used_decl_without_de // // CHECK: Function Attrs: noinline nounwind optnone // CHECK-LABEL: define {{[^@]+}}@fmv_d._Msb -// CHECK-SAME: () #[[ATTR22:[0-9]+]] { +// CHECK-SAME: () #[[ATTR23:[0-9]+]] { // CHECK-NEXT: entry: // CHECK-NEXT: ret i32 0 // @@ -648,8 +648,8 @@ int caller(void) { return used_def_without_default_decl() + used_decl_without_de // CHECK-NEXT: resolver_entry: // CHECK-NEXT: call void @__init_cpu_features_resolver() // CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 -// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 281474976710656 -// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 281474976710656 +// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 562949953421312 +// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 562949953421312 // CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]] // CHECK-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]] // CHECK: resolver_return: @@ -660,91 +660,91 @@ int caller(void) { return used_def_without_default_decl() + used_decl_without_de // // CHECK: Function Attrs: noinline nounwind optnone // CHECK-LABEL: define {{[^@]+}}@fmv_inline._Mf64mmMpmullMsha2 -// CHECK-SAME: () #[[ATTR23:[0-9]+]] { +// CHECK-SAME: () #[[ATTR24:[0-9]+]] { // CHECK-NEXT: entry: // CHECK-NEXT: ret i32 1 // // // CHECK: Function Attrs: noinline nounwind optnone // CHECK-LABEL: define {{[^@]+}}@fmv_inline._MfcmaMfp16MrdmMsme -// CHECK-SAME: () #[[ATTR24:[0-9]+]] { +// CHECK-SAME: () #[[ATTR25:[0-9]+]] { // CHECK-NEXT: entry: // CHECK-NEXT: ret i32 2 // // // CHECK: Function Attrs: noinline nounwind optnone // CHECK-LABEL: define {{[^@]+}}@fmv_inline._Mf32mmMi8mmMsha3 -// CHECK-SAME: () #[[ATTR25:[0-9]+]] { +// CHECK-SAME: () #[[ATTR26:[0-9]+]] { // CHECK-NEXT: entry: // CHECK-NEXT: ret i32 12 // // // CHECK: Function Attrs: noinline nounwind optnone // CHECK-LABEL: define {{[^@]+}}@fmv_inline._MditMsve-ebf16 -// CHECK-SAME: () #[[ATTR26:[0-9]+]] { +// CHECK-SAME: () #[[ATTR27:[0-9]+]] { // CHECK-NEXT: entry: // CHECK-NEXT: ret i32 8 // // // CHECK: Function Attrs: noinline nounwind optnone // CHECK-LABEL: define {{[^@]+}}@fmv_inline._MdpbMrcpc2 -// CHECK-SAME: () #[[ATTR27:[0-9]+]] { +// CHECK-SAME: () #[[ATTR28:[0-9]+]] { // CHECK-NEXT: entry: // CHECK-NEXT: ret i32 6 // // // CHECK: Function Attrs: noinline nounwind optnone // CHECK-LABEL: define {{[^@]+}}@fmv_inline._Mdpb2Mjscvt -// CHECK-SAME: () #[[ATTR28:[0-9]+]] { +// CHECK-SAME: () #[[ATTR29:[0-9]+]] { // CHECK-NEXT: entry: // CHECK-NEXT: ret i32 7 // // // CHECK: Function Attrs: noinline nounwind optnone // CHECK-LABEL: define {{[^@]+}}@fmv_inline._MfrinttsMrcpc -// CHECK-SAME: () #[[ATTR29:[0-9]+]] { +// CHECK-SAME: () #[[ATTR30:[0-9]+]] { // CHECK-NEXT: entry: // CHECK-NEXT: ret i32 3 // // // CHECK: Function Attrs: noinline nounwind optnone // CHECK-LABEL: define {{[^@]+}}@fmv_inline._MsveMsve-bf16 -// CHECK-SAME: () #[[ATTR30:[0-9]+]] { +// CHECK-SAME: () #[[ATTR31:[0-9]+]] { // CHECK-NEXT: entry: // CHECK-NEXT: ret i32 4 // // // CHECK: Function Attrs: noinline nounwind optnone // CHECK-LABEL: define {{[^@]+}}@fmv_inline._Msve2-aesMsve2-sha3 -// CHECK-SAME: () #[[ATTR31:[0-9]+]] { +// CHECK-SAME: () #[[ATTR32:[0-9]+]] { // CHECK-NEXT: entry: // CHECK-NEXT: ret i32 5 // // // CHECK: Function Attrs: noinline nounwind optnone // CHECK-LABEL: define {{[^@]+}}@fmv_inline._Msve2Msve2-bitpermMsve2-pmull128 -// CHECK-SAME: () #[[ATTR32:[0-9]+]] { +// CHECK-SAME: () #[[ATTR33:[0-9]+]] { // CHECK-NEXT: entry: // CHECK-NEXT: ret i32 9 // // // CHECK: Function Attrs: noinline nounwind optnone // CHECK-LABEL: define {{[^@]+}}@fmv_inline._Mmemtag2Msve2-sm4 -// CHECK-SAME: () #[[ATTR33:[0-9]+]] { +// CHECK-SAME: () #[[ATTR34:[0-9]+]] { // CHECK-NEXT: entry: // CHECK-NEXT: ret i32 10 // // // CHECK: Function Attrs: noinline nounwind optnone // CHECK-LABEL: define {{[^@]+}}@fmv_inline._Mmemtag3MmopsMrcpc3 -// CHECK-SAME: () #[[ATTR34:[0-9]+]] { +// CHECK-SAME: () #[[ATTR35:[0-9]+]] { // CHECK-NEXT: entry: // CHECK-NEXT: ret i32 11 // // // CHECK: Function Attrs: noinline nounwind optnone // CHECK-LABEL: define {{[^@]+}}@fmv_inline._MaesMdotprod -// CHECK-SAME: () #[[ATTR15]] { +// CHECK-SAME: () #[[ATTR16]] { // CHECK-NEXT: entry: // CHECK-NEXT: ret i32 13 // @@ -758,14 +758,14 @@ int caller(void) { return used_def_without_default_decl() + used_decl_without_de // // CHECK: Function Attrs: noinline nounwind optnone // CHECK-LABEL: define {{[^@]+}}@fmv_inline._MfpMsm4 -// CHECK-SAME: () #[[ATTR35:[0-9]+]] { +// CHECK-SAME: () #[[ATTR36:[0-9]+]] { // CHECK-NEXT: entry: // CHECK-NEXT: ret i32 15 // // // CHECK: Function Attrs: noinline nounwind optnone // CHECK-LABEL: define {{[^@]+}}@fmv_inline._MlseMrdm -// CHECK-SAME: () #[[ATTR36:[0-9]+]] { +// CHECK-SAME: () #[[ATTR37:[0-9]+]] { // CHECK-NEXT: entry: // CHECK-NEXT: ret i32 16 // @@ -1112,48 +1112,6 @@ int caller(void) { return used_def_without_default_decl() + used_decl_without_de // CHECK-NOFMV-NEXT: ret i32 1 // //. -// CHECK: attributes #[[ATTR0]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+flagm,+fp-armv8,+fp16fml,+fullfp16,+neon,+rand" } -// CHECK: attributes #[[ATTR1]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+altnzcv,+bf16,+flagm,+sme,+sme-i16i64" } -// CHECK: attributes #[[ATTR2]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+lse,+neon,+sha2" } -// CHECK: attributes #[[ATTR3]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+dotprod,+fp-armv8,+ls64,+neon" } -// CHECK: attributes #[[ATTR4]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fp16fml,+fullfp16,+neon" } -// CHECK: attributes #[[ATTR5]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+neon" } -// CHECK: attributes #[[ATTR6]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+crc,+ls64" } -// CHECK: attributes #[[ATTR7]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bti" } -// CHECK: attributes #[[ATTR8]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+sme,+sme2" } -// CHECK: attributes #[[ATTR9]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" } -// CHECK: attributes #[[ATTR10]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+ls64,+neon" } -// CHECK: attributes #[[ATTR11]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ccpp" } -// CHECK: attributes #[[ATTR12]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fullfp16,+neon" } -// CHECK: attributes #[[ATTR13:[0-9]+]] = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" } -// CHECK: attributes #[[ATTR14]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+mops" } -// CHECK: attributes #[[ATTR15]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+dotprod,+fp-armv8,+neon" } -// CHECK: attributes #[[ATTR16]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fullfp16,+neon,+sve" } -// CHECK: attributes #[[ATTR17]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+lse" } -// CHECK: attributes #[[ATTR18]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+neon,+rdm" } -// CHECK: attributes #[[ATTR19:[0-9]+]] = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+jsconv,+neon" } -// CHECK: attributes #[[ATTR20]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+jsconv,+neon" } -// CHECK: attributes #[[ATTR21:[0-9]+]] = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ls64" } -// CHECK: attributes #[[ATTR22]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+sb" } -// CHECK: attributes #[[ATTR23]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+aes,+f64mm,+fp-armv8,+fullfp16,+neon,+sha2,+sve" } -// CHECK: attributes #[[ATTR24]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+complxnum,+fp-armv8,+fullfp16,+neon,+rdm,+sme" } -// CHECK: attributes #[[ATTR25]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+f32mm,+fp-armv8,+fullfp16,+i8mm,+neon,+sha2,+sha3,+sve" } -// CHECK: attributes #[[ATTR26]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+dit,+fp-armv8,+fullfp16,+neon,+sve" } -// CHECK: attributes #[[ATTR27]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ccpp,+rcpc" } -// CHECK: attributes #[[ATTR28]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ccdp,+ccpp,+fp-armv8,+jsconv,+neon" } -// CHECK: attributes #[[ATTR29]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fptoint,+rcpc" } -// CHECK: attributes #[[ATTR30]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+fp-armv8,+fullfp16,+neon,+sve" } -// CHECK: attributes #[[ATTR31]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fullfp16,+neon,+sve,+sve2,+sve2-aes,+sve2-sha3" } -// CHECK: attributes #[[ATTR32]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fullfp16,+neon,+sve,+sve2,+sve2-aes,+sve2-bitperm" } -// CHECK: attributes #[[ATTR33]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fullfp16,+mte,+neon,+sve,+sve2,+sve2-sm4" } -// CHECK: attributes #[[ATTR34]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+mops,+mte,+rcpc,+rcpc3" } -// CHECK: attributes #[[ATTR35]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+neon,+sm4" } -// CHECK: attributes #[[ATTR36]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+lse,+neon,+rdm" } -// CHECK: attributes #[[ATTR37:[0-9]+]] = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+neon,+rdm" } -//. -// CHECK-NOFMV: attributes #[[ATTR0]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="-fmv" } -// CHECK-NOFMV: attributes #[[ATTR1:[0-9]+]] = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="-fmv" } -//. // CHECK: [[META0:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} // CHECK: [[META1:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} //. diff --git a/clang/test/CodeGenCXX/attr-target-clones-aarch64.cpp b/clang/test/CodeGenCXX/attr-target-clones-aarch64.cpp index 913997d53cf3e04..26760a90eb1c715 100644 --- a/clang/test/CodeGenCXX/attr-target-clones-aarch64.cpp +++ b/clang/test/CodeGenCXX/attr-target-clones-aarch64.cpp @@ -1,4 +1,4 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals all --include-generated-funcs --version 5 +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals all --include-generated-funcs --global-value-regex ".*" --version 5 // RUN: %clang_cc1 -std=c++11 -triple aarch64-linux-gnu -emit-llvm %s -o - | FileCheck %s int __attribute__((target_clones("ls64+fp16", "default"))) foo_ovl(int) { return 1; } @@ -173,8 +173,8 @@ void run_foo_tml() { // CHECK-NEXT: [[RESOLVER_ENTRY:.*:]] // CHECK-NEXT: call void @__init_cpu_features_resolver() // CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 -// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 36310271995674624 -// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 36310271995674624 +// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 36591746972385280 +// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 36591746972385280 // CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]] // CHECK-NEXT: br i1 [[TMP3]], label %[[RESOLVER_RETURN:.*]], label %[[RESOLVER_ELSE:.*]] // CHECK: [[RESOLVER_RETURN]]: @@ -222,8 +222,8 @@ void run_foo_tml() { // CHECK-NEXT: [[RESOLVER_ENTRY:.*:]] // CHECK-NEXT: call void @__init_cpu_features_resolver() // CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 -// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 36310271995674624 -// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 36310271995674624 +// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 36591746972385280 +// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 36591746972385280 // CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]] // CHECK-NEXT: br i1 [[TMP3]], label %[[RESOLVER_RETURN:.*]], label %[[RESOLVER_ELSE:.*]] // CHECK: [[RESOLVER_RETURN]]: @@ -240,11 +240,6 @@ void run_foo_tml() { // CHECK-NEXT: ret ptr @_ZN7MyClassIisE7foo_tmlEv.default // //. -// CHECK: attributes #[[ATTR0]] = { mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fullfp16,+ls64,+neon" } -// CHECK: attributes #[[ATTR1]] = { mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" } -// CHECK: attributes #[[ATTR2]] = { mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fptoint" } -// CHECK: attributes #[[ATTR3]] = { mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+sme,+sme-f64f64" } -//. // CHECK: [[META0:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} // CHECK: [[META1:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} //. diff --git a/clang/test/CodeGenCXX/attr-target-version-riscv.cpp b/clang/test/CodeGenCXX/attr-target-version-riscv.cpp new file mode 100644 index 000000000000000..9078f6541b3dcb3 --- /dev/null +++ b/clang/test/CodeGenCXX/attr-target-version-riscv.cpp @@ -0,0 +1,432 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals all --include-generated-funcs --version 4 +// RUN: %clang_cc1 -std=c++11 -triple riscv64-linux-gnu -target-feature +i -target-feature +m -emit-llvm %s -o - | FileCheck %s + +__attribute__((target_version("arch=+v"))) int foo1(void) { return 1; } +__attribute__((target_version("default"))) int foo1(void) { return 1; } + +__attribute__((target_version("arch=+zbb"))) int foo2(void) { return 1; } +__attribute__((target_version("arch=+m"))) int foo2(void) { return 1; } +__attribute__((target_version("default"))) int foo2(void) { return 1; } + +__attribute__((target_version("arch=+zbb,+c"))) int foo3(void) { return 1; } +__attribute__((target_version("arch=+m"))) int foo3(void) { return 1; } +__attribute__((target_version("default"))) int foo3(void) { return 1; } + +__attribute__((target_version("arch=+zba"))) int foo4(void) { return 1; } +__attribute__((target_version("arch=+zbb"))) int foo4(void) { return 1; } +__attribute__((target_version("arch=+zbb,+zba"))) int foo4(void) { return 1; } +__attribute__((target_version("default"))) int foo4(void) { return 1; } + +__attribute__((target_version("arch=+zba"))) int foo5(void) { return 1; } +__attribute__((target_version("arch=+zbb,+zba"))) int foo5(void) { return 1; } +__attribute__((target_version("arch=+zbb"))) int foo5(void) { return 1; } +__attribute__((target_version("default"))) int foo5(void) { return 1; } + +__attribute__((target_version("arch=+zba"))) int foo6(void) { return 1; } +__attribute__((target_version("arch=+zbb"))) int foo6(void) { return 1; } +__attribute__((target_version("arch=+zbb,+zba;priority=10"))) int foo6(void) { return 1; } +__attribute__((target_version("default"))) int foo6(void) { return 1; } + +__attribute__((target_version("priority=8;arch=+zba"))) int foo7(void) { return 1; } +__attribute__((target_version("arch=+zbb;priority=9"))) int foo7(void) { return 1; } +__attribute__((target_version("arch=+zbb,+zba;priority=10"))) int foo7(void) { return 1; } +__attribute__((target_version("default"))) int foo7(void) { return 1; } + +__attribute__((target_version("priority=-1;arch=+zba"))) int foo8(void) { return 1; } +__attribute__((target_version("arch=+zbb;priority=-2"))) int foo8(void) { return 1; } +__attribute__((target_version("arch=+zbb,+zba;priority=3"))) int foo8(void) { return 1; } +__attribute__((target_version("default"))) int foo8(void) { return 1; } + +int bar() { return foo1() + foo2() + foo3(); } +//. +// CHECK: @__riscv_feature_bits = external dso_local global { i32, [2 x i64] } +// CHECK: @_Z4foo1v = weak_odr ifunc i32 (), ptr @_Z4foo1v.resolver +// CHECK: @_Z4foo2v = weak_odr ifunc i32 (), ptr @_Z4foo2v.resolver +// CHECK: @_Z4foo3v = weak_odr ifunc i32 (), ptr @_Z4foo3v.resolver +// CHECK: @_Z4foo4v = weak_odr ifunc i32 (), ptr @_Z4foo4v.resolver +// CHECK: @_Z4foo5v = weak_odr ifunc i32 (), ptr @_Z4foo5v.resolver +// CHECK: @_Z4foo6v = weak_odr ifunc i32 (), ptr @_Z4foo6v.resolver +// CHECK: @_Z4foo7v = weak_odr ifunc i32 (), ptr @_Z4foo7v.resolver +// CHECK: @_Z4foo8v = weak_odr ifunc i32 (), ptr @_Z4foo8v.resolver +//. +// CHECK-LABEL: define dso_local noundef signext i32 @_Z4foo1v._v( +// CHECK-SAME: ) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 1 +// +// +// CHECK-LABEL: define dso_local noundef signext i32 @_Z4foo1v.default( +// CHECK-SAME: ) #[[ATTR1:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 1 +// +// +// CHECK-LABEL: define dso_local noundef signext i32 @_Z4foo2v._zbb( +// CHECK-SAME: ) #[[ATTR2:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 1 +// +// +// CHECK-LABEL: define dso_local noundef signext i32 @_Z4foo2v._m( +// CHECK-SAME: ) #[[ATTR1]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 1 +// +// +// CHECK-LABEL: define dso_local noundef signext i32 @_Z4foo2v.default( +// CHECK-SAME: ) #[[ATTR1]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 1 +// +// +// CHECK-LABEL: define dso_local noundef signext i32 @_Z4foo3v._c_zbb( +// CHECK-SAME: ) #[[ATTR3:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 1 +// +// +// CHECK-LABEL: define dso_local noundef signext i32 @_Z4foo3v._m( +// CHECK-SAME: ) #[[ATTR1]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 1 +// +// +// CHECK-LABEL: define dso_local noundef signext i32 @_Z4foo3v.default( +// CHECK-SAME: ) #[[ATTR1]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 1 +// +// +// CHECK-LABEL: define dso_local noundef signext i32 @_Z4foo4v._zba( +// CHECK-SAME: ) #[[ATTR4:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 1 +// +// +// CHECK-LABEL: define dso_local noundef signext i32 @_Z4foo4v._zbb( +// CHECK-SAME: ) #[[ATTR2]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 1 +// +// +// CHECK-LABEL: define dso_local noundef signext i32 @_Z4foo4v._zba_zbb( +// CHECK-SAME: ) #[[ATTR5:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 1 +// +// +// CHECK-LABEL: define dso_local noundef signext i32 @_Z4foo4v.default( +// CHECK-SAME: ) #[[ATTR1]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 1 +// +// +// CHECK-LABEL: define dso_local noundef signext i32 @_Z4foo5v._zba( +// CHECK-SAME: ) #[[ATTR4]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 1 +// +// +// CHECK-LABEL: define dso_local noundef signext i32 @_Z4foo5v._zba_zbb( +// CHECK-SAME: ) #[[ATTR5]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 1 +// +// +// CHECK-LABEL: define dso_local noundef signext i32 @_Z4foo5v._zbb( +// CHECK-SAME: ) #[[ATTR2]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 1 +// +// +// CHECK-LABEL: define dso_local noundef signext i32 @_Z4foo5v.default( +// CHECK-SAME: ) #[[ATTR1]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 1 +// +// +// CHECK-LABEL: define dso_local noundef signext i32 @_Z4foo6v._zba( +// CHECK-SAME: ) #[[ATTR4]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 1 +// +// +// CHECK-LABEL: define dso_local noundef signext i32 @_Z4foo6v._zbb( +// CHECK-SAME: ) #[[ATTR2]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 1 +// +// +// CHECK-LABEL: define dso_local noundef signext i32 @_Z4foo6v._zba_zbb( +// CHECK-SAME: ) #[[ATTR5]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 1 +// +// +// CHECK-LABEL: define dso_local noundef signext i32 @_Z4foo6v.default( +// CHECK-SAME: ) #[[ATTR1]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 1 +// +// +// CHECK-LABEL: define dso_local noundef signext i32 @_Z4foo7v._zba( +// CHECK-SAME: ) #[[ATTR4]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 1 +// +// +// CHECK-LABEL: define dso_local noundef signext i32 @_Z4foo7v._zbb( +// CHECK-SAME: ) #[[ATTR2]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 1 +// +// +// CHECK-LABEL: define dso_local noundef signext i32 @_Z4foo7v._zba_zbb( +// CHECK-SAME: ) #[[ATTR5]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 1 +// +// +// CHECK-LABEL: define dso_local noundef signext i32 @_Z4foo7v.default( +// CHECK-SAME: ) #[[ATTR1]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 1 +// +// +// CHECK-LABEL: define dso_local noundef signext i32 @_Z4foo8v._zba( +// CHECK-SAME: ) #[[ATTR4]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 1 +// +// +// CHECK-LABEL: define dso_local noundef signext i32 @_Z4foo8v._zbb( +// CHECK-SAME: ) #[[ATTR2]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 1 +// +// +// CHECK-LABEL: define dso_local noundef signext i32 @_Z4foo8v._zba_zbb( +// CHECK-SAME: ) #[[ATTR5]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 1 +// +// +// CHECK-LABEL: define dso_local noundef signext i32 @_Z4foo8v.default( +// CHECK-SAME: ) #[[ATTR1]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 1 +// +// +// CHECK-LABEL: define dso_local noundef signext i32 @_Z3barv( +// CHECK-SAME: ) #[[ATTR1]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[CALL:%.*]] = call noundef signext i32 @_Z4foo1v() +// CHECK-NEXT: [[CALL1:%.*]] = call noundef signext i32 @_Z4foo2v() +// CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[CALL]], [[CALL1]] +// CHECK-NEXT: [[CALL2:%.*]] = call noundef signext i32 @_Z4foo3v() +// CHECK-NEXT: [[ADD3:%.*]] = add nsw i32 [[ADD]], [[CALL2]] +// CHECK-NEXT: ret i32 [[ADD3]] +// +// +// CHECK-LABEL: define weak_odr ptr @_Z4foo1v.resolver() comdat { +// CHECK-NEXT: resolver_entry: +// CHECK-NEXT: call void @__init_riscv_feature_bits(ptr null) +// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr getelementptr inbounds ({ i32, [2 x i64] }, ptr @__riscv_feature_bits, i32 0, i32 1, i32 0), align 8 +// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 2097152 +// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 2097152 +// CHECK-NEXT: br i1 [[TMP2]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]] +// CHECK: resolver_return: +// CHECK-NEXT: ret ptr @_Z4foo1v._v +// CHECK: resolver_else: +// CHECK-NEXT: ret ptr @_Z4foo1v.default +// +// +// CHECK-LABEL: define weak_odr ptr @_Z4foo2v.resolver() comdat { +// CHECK-NEXT: resolver_entry: +// CHECK-NEXT: call void @__init_riscv_feature_bits(ptr null) +// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr getelementptr inbounds ({ i32, [2 x i64] }, ptr @__riscv_feature_bits, i32 0, i32 1, i32 0), align 8 +// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 268435456 +// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 268435456 +// CHECK-NEXT: br i1 [[TMP2]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]] +// CHECK: resolver_return: +// CHECK-NEXT: ret ptr @_Z4foo2v._zbb +// CHECK: resolver_else: +// CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr getelementptr inbounds ({ i32, [2 x i64] }, ptr @__riscv_feature_bits, i32 0, i32 1, i32 0), align 8 +// CHECK-NEXT: [[TMP4:%.*]] = and i64 [[TMP3]], 4096 +// CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[TMP4]], 4096 +// CHECK-NEXT: br i1 [[TMP5]], label [[RESOLVER_RETURN1:%.*]], label [[RESOLVER_ELSE2:%.*]] +// CHECK: resolver_return1: +// CHECK-NEXT: ret ptr @_Z4foo2v._m +// CHECK: resolver_else2: +// CHECK-NEXT: ret ptr @_Z4foo2v.default +// +// +// CHECK-LABEL: define weak_odr ptr @_Z4foo3v.resolver() comdat { +// CHECK-NEXT: resolver_entry: +// CHECK-NEXT: call void @__init_riscv_feature_bits(ptr null) +// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr getelementptr inbounds ({ i32, [2 x i64] }, ptr @__riscv_feature_bits, i32 0, i32 1, i32 0), align 8 +// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 268435460 +// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 268435460 +// CHECK-NEXT: br i1 [[TMP2]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]] +// CHECK: resolver_return: +// CHECK-NEXT: ret ptr @_Z4foo3v._c_zbb +// CHECK: resolver_else: +// CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr getelementptr inbounds ({ i32, [2 x i64] }, ptr @__riscv_feature_bits, i32 0, i32 1, i32 0), align 8 +// CHECK-NEXT: [[TMP4:%.*]] = and i64 [[TMP3]], 4096 +// CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[TMP4]], 4096 +// CHECK-NEXT: br i1 [[TMP5]], label [[RESOLVER_RETURN1:%.*]], label [[RESOLVER_ELSE2:%.*]] +// CHECK: resolver_return1: +// CHECK-NEXT: ret ptr @_Z4foo3v._m +// CHECK: resolver_else2: +// CHECK-NEXT: ret ptr @_Z4foo3v.default +// +// +// CHECK-LABEL: define weak_odr ptr @_Z4foo4v.resolver() comdat { +// CHECK-NEXT: resolver_entry: +// CHECK-NEXT: call void @__init_riscv_feature_bits(ptr null) +// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr getelementptr inbounds ({ i32, [2 x i64] }, ptr @__riscv_feature_bits, i32 0, i32 1, i32 0), align 8 +// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 134217728 +// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 134217728 +// CHECK-NEXT: br i1 [[TMP2]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]] +// CHECK: resolver_return: +// CHECK-NEXT: ret ptr @_Z4foo4v._zba +// CHECK: resolver_else: +// CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr getelementptr inbounds ({ i32, [2 x i64] }, ptr @__riscv_feature_bits, i32 0, i32 1, i32 0), align 8 +// CHECK-NEXT: [[TMP4:%.*]] = and i64 [[TMP3]], 268435456 +// CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[TMP4]], 268435456 +// CHECK-NEXT: br i1 [[TMP5]], label [[RESOLVER_RETURN1:%.*]], label [[RESOLVER_ELSE2:%.*]] +// CHECK: resolver_return1: +// CHECK-NEXT: ret ptr @_Z4foo4v._zbb +// CHECK: resolver_else2: +// CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr getelementptr inbounds ({ i32, [2 x i64] }, ptr @__riscv_feature_bits, i32 0, i32 1, i32 0), align 8 +// CHECK-NEXT: [[TMP7:%.*]] = and i64 [[TMP6]], 402653184 +// CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[TMP7]], 402653184 +// CHECK-NEXT: br i1 [[TMP8]], label [[RESOLVER_RETURN3:%.*]], label [[RESOLVER_ELSE4:%.*]] +// CHECK: resolver_return3: +// CHECK-NEXT: ret ptr @_Z4foo4v._zba_zbb +// CHECK: resolver_else4: +// CHECK-NEXT: ret ptr @_Z4foo4v.default +// +// +// CHECK-LABEL: define weak_odr ptr @_Z4foo5v.resolver() comdat { +// CHECK-NEXT: resolver_entry: +// CHECK-NEXT: call void @__init_riscv_feature_bits(ptr null) +// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr getelementptr inbounds ({ i32, [2 x i64] }, ptr @__riscv_feature_bits, i32 0, i32 1, i32 0), align 8 +// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 134217728 +// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 134217728 +// CHECK-NEXT: br i1 [[TMP2]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]] +// CHECK: resolver_return: +// CHECK-NEXT: ret ptr @_Z4foo5v._zba +// CHECK: resolver_else: +// CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr getelementptr inbounds ({ i32, [2 x i64] }, ptr @__riscv_feature_bits, i32 0, i32 1, i32 0), align 8 +// CHECK-NEXT: [[TMP4:%.*]] = and i64 [[TMP3]], 402653184 +// CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[TMP4]], 402653184 +// CHECK-NEXT: br i1 [[TMP5]], label [[RESOLVER_RETURN1:%.*]], label [[RESOLVER_ELSE2:%.*]] +// CHECK: resolver_return1: +// CHECK-NEXT: ret ptr @_Z4foo5v._zba_zbb +// CHECK: resolver_else2: +// CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr getelementptr inbounds ({ i32, [2 x i64] }, ptr @__riscv_feature_bits, i32 0, i32 1, i32 0), align 8 +// CHECK-NEXT: [[TMP7:%.*]] = and i64 [[TMP6]], 268435456 +// CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[TMP7]], 268435456 +// CHECK-NEXT: br i1 [[TMP8]], label [[RESOLVER_RETURN3:%.*]], label [[RESOLVER_ELSE4:%.*]] +// CHECK: resolver_return3: +// CHECK-NEXT: ret ptr @_Z4foo5v._zbb +// CHECK: resolver_else4: +// CHECK-NEXT: ret ptr @_Z4foo5v.default +// +// +// CHECK-LABEL: define weak_odr ptr @_Z4foo6v.resolver() comdat { +// CHECK-NEXT: resolver_entry: +// CHECK-NEXT: call void @__init_riscv_feature_bits(ptr null) +// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr getelementptr inbounds ({ i32, [2 x i64] }, ptr @__riscv_feature_bits, i32 0, i32 1, i32 0), align 8 +// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 402653184 +// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 402653184 +// CHECK-NEXT: br i1 [[TMP2]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]] +// CHECK: resolver_return: +// CHECK-NEXT: ret ptr @_Z4foo6v._zba_zbb +// CHECK: resolver_else: +// CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr getelementptr inbounds ({ i32, [2 x i64] }, ptr @__riscv_feature_bits, i32 0, i32 1, i32 0), align 8 +// CHECK-NEXT: [[TMP4:%.*]] = and i64 [[TMP3]], 134217728 +// CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[TMP4]], 134217728 +// CHECK-NEXT: br i1 [[TMP5]], label [[RESOLVER_RETURN1:%.*]], label [[RESOLVER_ELSE2:%.*]] +// CHECK: resolver_return1: +// CHECK-NEXT: ret ptr @_Z4foo6v._zba +// CHECK: resolver_else2: +// CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr getelementptr inbounds ({ i32, [2 x i64] }, ptr @__riscv_feature_bits, i32 0, i32 1, i32 0), align 8 +// CHECK-NEXT: [[TMP7:%.*]] = and i64 [[TMP6]], 268435456 +// CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[TMP7]], 268435456 +// CHECK-NEXT: br i1 [[TMP8]], label [[RESOLVER_RETURN3:%.*]], label [[RESOLVER_ELSE4:%.*]] +// CHECK: resolver_return3: +// CHECK-NEXT: ret ptr @_Z4foo6v._zbb +// CHECK: resolver_else4: +// CHECK-NEXT: ret ptr @_Z4foo6v.default +// +// +// CHECK-LABEL: define weak_odr ptr @_Z4foo7v.resolver() comdat { +// CHECK-NEXT: resolver_entry: +// CHECK-NEXT: call void @__init_riscv_feature_bits(ptr null) +// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr getelementptr inbounds ({ i32, [2 x i64] }, ptr @__riscv_feature_bits, i32 0, i32 1, i32 0), align 8 +// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 402653184 +// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 402653184 +// CHECK-NEXT: br i1 [[TMP2]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]] +// CHECK: resolver_return: +// CHECK-NEXT: ret ptr @_Z4foo7v._zba_zbb +// CHECK: resolver_else: +// CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr getelementptr inbounds ({ i32, [2 x i64] }, ptr @__riscv_feature_bits, i32 0, i32 1, i32 0), align 8 +// CHECK-NEXT: [[TMP4:%.*]] = and i64 [[TMP3]], 268435456 +// CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[TMP4]], 268435456 +// CHECK-NEXT: br i1 [[TMP5]], label [[RESOLVER_RETURN1:%.*]], label [[RESOLVER_ELSE2:%.*]] +// CHECK: resolver_return1: +// CHECK-NEXT: ret ptr @_Z4foo7v._zbb +// CHECK: resolver_else2: +// CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr getelementptr inbounds ({ i32, [2 x i64] }, ptr @__riscv_feature_bits, i32 0, i32 1, i32 0), align 8 +// CHECK-NEXT: [[TMP7:%.*]] = and i64 [[TMP6]], 134217728 +// CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[TMP7]], 134217728 +// CHECK-NEXT: br i1 [[TMP8]], label [[RESOLVER_RETURN3:%.*]], label [[RESOLVER_ELSE4:%.*]] +// CHECK: resolver_return3: +// CHECK-NEXT: ret ptr @_Z4foo7v._zba +// CHECK: resolver_else4: +// CHECK-NEXT: ret ptr @_Z4foo7v.default +// +// +// CHECK-LABEL: define weak_odr ptr @_Z4foo8v.resolver() comdat { +// CHECK-NEXT: resolver_entry: +// CHECK-NEXT: call void @__init_riscv_feature_bits(ptr null) +// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr getelementptr inbounds ({ i32, [2 x i64] }, ptr @__riscv_feature_bits, i32 0, i32 1, i32 0), align 8 +// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 402653184 +// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 402653184 +// CHECK-NEXT: br i1 [[TMP2]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]] +// CHECK: resolver_return: +// CHECK-NEXT: ret ptr @_Z4foo8v._zba_zbb +// CHECK: resolver_else: +// CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr getelementptr inbounds ({ i32, [2 x i64] }, ptr @__riscv_feature_bits, i32 0, i32 1, i32 0), align 8 +// CHECK-NEXT: [[TMP4:%.*]] = and i64 [[TMP3]], 134217728 +// CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[TMP4]], 134217728 +// CHECK-NEXT: br i1 [[TMP5]], label [[RESOLVER_RETURN1:%.*]], label [[RESOLVER_ELSE2:%.*]] +// CHECK: resolver_return1: +// CHECK-NEXT: ret ptr @_Z4foo8v._zba +// CHECK: resolver_else2: +// CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr getelementptr inbounds ({ i32, [2 x i64] }, ptr @__riscv_feature_bits, i32 0, i32 1, i32 0), align 8 +// CHECK-NEXT: [[TMP7:%.*]] = and i64 [[TMP6]], 268435456 +// CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[TMP7]], 268435456 +// CHECK-NEXT: br i1 [[TMP8]], label [[RESOLVER_RETURN3:%.*]], label [[RESOLVER_ELSE4:%.*]] +// CHECK: resolver_return3: +// CHECK-NEXT: ret ptr @_Z4foo8v._zbb +// CHECK: resolver_else4: +// CHECK-NEXT: ret ptr @_Z4foo8v.default +// +//. +// CHECK: attributes #[[ATTR0]] = { mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+64bit,+d,+f,+i,+m,+v,+zicsr,+zmmul,+zve32f,+zve32x,+zve64d,+zve64f,+zve64x,+zvl128b,+zvl32b,+zvl64b" } +// CHECK: attributes #[[ATTR1]] = { mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+64bit,+i,+m,+zmmul" } +// CHECK: attributes #[[ATTR2]] = { mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+64bit,+i,+m,+zbb,+zmmul" } +// CHECK: attributes #[[ATTR3]] = { mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+64bit,+c,+i,+m,+zbb,+zmmul" } +// CHECK: attributes #[[ATTR4]] = { mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+64bit,+i,+m,+zba,+zmmul" } +// CHECK: attributes #[[ATTR5]] = { mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+64bit,+i,+m,+zba,+zbb,+zmmul" } +//. +// CHECK: [[META0:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} +// CHECK: [[META1:![0-9]+]] = !{i32 1, !"target-abi", !"lp64"} +// CHECK: [[META2:![0-9]+]] = !{i32 6, !"riscv-isa", [[META3:![0-9]+]]} +// CHECK: [[META3]] = !{!"rv64i2p1_m2p0_zmmul1p0"} +// CHECK: [[META4:![0-9]+]] = !{i32 8, !"SmallDataLimit", i32 0} +// CHECK: [[META5:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} +//. diff --git a/clang/test/CodeGenCXX/attr-target-version.cpp b/clang/test/CodeGenCXX/attr-target-version.cpp index 6661abead20c6db..38eebc20de12b45 100644 --- a/clang/test/CodeGenCXX/attr-target-version.cpp +++ b/clang/test/CodeGenCXX/attr-target-version.cpp @@ -1,4 +1,4 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals all --include-generated-funcs --version 5 +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals all --include-generated-funcs --global-value-regex ".*" --version 5 // RUN: %clang_cc1 -std=c++11 -triple aarch64-linux-gnu -emit-llvm %s -o - | FileCheck %s int __attribute__((target_version("sme-f64f64+bf16"))) foo(int) { return 1; } @@ -323,19 +323,6 @@ int bar() { // CHECK-NEXT: ret ptr @_ZN7MyClass40unused_with_implicit_forward_default_defEv.default // //. -// CHECK: attributes #[[ATTR0]] = { mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+sme,+sme-f64f64" } -// CHECK: attributes #[[ATTR1]] = { mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" } -// CHECK: attributes #[[ATTR2]] = { mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+fp-armv8,+neon,+sm4" } -// CHECK: attributes #[[ATTR3]] = { mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+crc" } -// CHECK: attributes #[[ATTR4]] = { mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+dotprod,+fp-armv8,+neon" } -// CHECK: attributes #[[ATTR5]] = { mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+mops" } -// CHECK: attributes #[[ATTR6]] = { mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+neon" } -// CHECK: attributes #[[ATTR7]] = { mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fullfp16,+neon,+sve" } -// CHECK: attributes #[[ATTR8]] = { mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fullfp16,+neon" } -// CHECK: attributes #[[ATTR9]] = { mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+lse" } -// CHECK: attributes #[[ATTR10]] = { mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+neon,+rdm" } -// CHECK: attributes #[[ATTR11:[0-9]+]] = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" } -//. // CHECK: [[META0:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} // CHECK: [[META1:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} //. diff --git a/clang/test/CodeGenCXX/fmv-namespace.cpp b/clang/test/CodeGenCXX/fmv-namespace.cpp index 1ac88e68a3a12dd..c2346bdc1d68b2c 100644 --- a/clang/test/CodeGenCXX/fmv-namespace.cpp +++ b/clang/test/CodeGenCXX/fmv-namespace.cpp @@ -1,4 +1,4 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals all --include-generated-funcs --version 5 +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals all --include-generated-funcs --global-value-regex ".*" --version 5 // RUN: %clang_cc1 -triple aarch64-linux-gnu -emit-llvm %s -o - | FileCheck %s namespace Name { @@ -100,11 +100,6 @@ __attribute((target_version("mops"))) int bar() { return 1; } // CHECK-NEXT: ret ptr @_ZN3Foo3barEv.default // //. -// CHECK: attributes #[[ATTR0]] = { mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fullfp16,+neon,+sve" } -// CHECK: attributes #[[ATTR1]] = { mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" } -// CHECK: attributes #[[ATTR2:[0-9]+]] = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fullfp16,+neon,+sve" } -// CHECK: attributes #[[ATTR3]] = { mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+mops" } -//. // CHECK: [[META0:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} // CHECK: [[META1:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} //. diff --git a/clang/test/Modules/embed-files-compressed.cpp b/clang/test/Modules/embed-files-compressed.cpp index 873b3082a2fdfa7..aca9983ff160b6d 100644 --- a/clang/test/Modules/embed-files-compressed.cpp +++ b/clang/test/Modules/embed-files-compressed.cpp @@ -17,7 +17,7 @@ // RUN: %clang_cc1 -fmodules -I%t -fmodules-cache-path=%t -fmodule-name=a -emit-module %t/modulemap -fmodules-embed-all-files -o %t/a.pcm // // The above embeds ~4.5MB of highly-predictable /s and \ns into the pcm file. -// Check that the resulting file is under 60KB: +// Check that the resulting file is under 80KB: // // RUN: wc -c %t/a.pcm | FileCheck --check-prefix=CHECK-SIZE %s -// CHECK-SIZE: {{(^|[^0-9])[1-5][0-9][0-9][0-9][0-9]($|[^0-9])}} +// CHECK-SIZE: {{(^|[^0-9])[1-7][0-9][0-9][0-9][0-9]($|[^0-9])}} diff --git a/clang/test/Modules/gh110401.cppm b/clang/test/Modules/gh110401.cppm new file mode 100644 index 000000000000000..6b335eb5ba9d556 --- /dev/null +++ b/clang/test/Modules/gh110401.cppm @@ -0,0 +1,44 @@ +// RUN: rm -rf %t +// RUN: mkdir %t +// RUN: split-file %s %t +// +// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux -emit-module-interface %t/a.cppm -o %t/A.pcm +// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux -emit-module-interface -fprebuilt-module-path=%t %t/b.cppm -o %t/B.pcm + +// Just check that this doesn't crash. + +//--- a.cppm +module; + +template +void __do_visit(_Visitor &&__visitor) { + using _V0 = int; + [](_V0 __v) -> _V0 { return __v; } (1); +} + +export module A; + +void g() { + struct Visitor { }; + __do_visit(Visitor()); +} + +//--- b.cppm +module; + +template +void __do_visit(_Visitor &&__visitor) { + using _V0 = int; + + // Check that we instantiate this lambda's call operator in 'f' below + // instead of the one in 'a.cppm' here; otherwise, we won't find a + // corresponding instantiation of the using declaration above. + [](_V0 __v) -> _V0 { return __v; } (1); +} + +export module B; +import A; + +void f() { + __do_visit(1); +} diff --git a/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c b/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c index c26f8346cd74865..80ce90f693efbc4 100644 --- a/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c +++ b/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c @@ -6,22 +6,22 @@ void test_features(uint32_t slice, svfloat16x2_t zn2, svfloat16x4_t zn4, svbfloat16x2_t bzn2, svbfloat16x4_t bzn4) __arm_streaming __arm_inout("za") { - // expected-error@+1 {{'svadd_za16_f16_vg1x2' needs target feature sme-f16f16|sme-f8f16}} + // expected-error@+1 {{'svadd_za16_f16_vg1x2' needs target feature sme,(sme-f16f16|sme-f8f16)}} svadd_za16_f16_vg1x2(slice, zn2); - // expected-error@+1 {{'svadd_za16_f16_vg1x4' needs target feature sme-f16f16|sme-f8f16}} + // expected-error@+1 {{'svadd_za16_f16_vg1x4' needs target feature sme,(sme-f16f16|sme-f8f16)}} svadd_za16_f16_vg1x4(slice, zn4); - // expected-error@+1 {{'svsub_za16_f16_vg1x2' needs target feature sme-f16f16|sme-f8f16}} + // expected-error@+1 {{'svsub_za16_f16_vg1x2' needs target feature sme,(sme-f16f16|sme-f8f16)}} svsub_za16_f16_vg1x2(slice, zn2); - // expected-error@+1 {{'svsub_za16_f16_vg1x4' needs target feature sme-f16f16|sme-f8f16}} + // expected-error@+1 {{'svsub_za16_f16_vg1x4' needs target feature sme,(sme-f16f16|sme-f8f16)}} svsub_za16_f16_vg1x4(slice, zn4); - // expected-error@+1 {{'svadd_za16_bf16_vg1x2' needs target feature sme-b16b16}} + // expected-error@+1 {{'svadd_za16_bf16_vg1x2' needs target feature sme,sme-b16b16}} svadd_za16_bf16_vg1x2(slice, bzn2); - // expected-error@+1 {{'svadd_za16_bf16_vg1x4' needs target feature sme-b16b16}} + // expected-error@+1 {{'svadd_za16_bf16_vg1x4' needs target feature sme,sme-b16b16}} svadd_za16_bf16_vg1x4(slice, bzn4); - // expected-error@+1 {{'svsub_za16_bf16_vg1x2' needs target feature sme-b16b16}} + // expected-error@+1 {{'svsub_za16_bf16_vg1x2' needs target feature sme,sme-b16b16}} svsub_za16_bf16_vg1x2(slice, bzn2); - // expected-error@+1 {{'svsub_za16_bf16_vg1x4' needs target feature sme-b16b16}} + // expected-error@+1 {{'svsub_za16_bf16_vg1x4' needs target feature sme,sme-b16b16}} svsub_za16_bf16_vg1x4(slice, bzn4); } diff --git a/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_b16b16.cpp b/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_b16b16.cpp index b93f348557c0144..6c170cd776988ee 100644 --- a/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_b16b16.cpp +++ b/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_b16b16.cpp @@ -6,45 +6,45 @@ void test_b16b16( svbfloat16_t bf16, svbfloat16x2_t bf16x2, svbfloat16x4_t bf16x4) __arm_streaming { - // expected-error@+1 {{'svclamp_single_bf16_x2' needs target feature sme2,sve-b16b16}} + // expected-error@+1 {{'svclamp_single_bf16_x2' needs target feature sme,sme2,sve-b16b16}} svclamp_single_bf16_x2(bf16x2, bf16, bf16); - // expected-error@+1 {{'svclamp_single_bf16_x4' needs target feature sme2,sve-b16b16}} + // expected-error@+1 {{'svclamp_single_bf16_x4' needs target feature sme,sme2,sve-b16b16}} svclamp_single_bf16_x4(bf16x4, bf16, bf16); - // expected-error@+1 {{'svmax_single_bf16_x2' needs target feature sme2,sve-b16b16}} + // expected-error@+1 {{'svmax_single_bf16_x2' needs target feature sme,sme2,sve-b16b16}} svmax_single_bf16_x2(bf16x2, bf16); - // expected-error@+1 {{'svmax_single_bf16_x4' needs target feature sme2,sve-b16b16}} + // expected-error@+1 {{'svmax_single_bf16_x4' needs target feature sme,sme2,sve-b16b16}} svmax_single_bf16_x4(bf16x4, bf16); - // expected-error@+1 {{'svmax_bf16_x2' needs target feature sme2,sve-b16b16}} + // expected-error@+1 {{'svmax_bf16_x2' needs target feature sme,sme2,sve-b16b16}} svmax_bf16_x2(bf16x2, bf16x2); - // expected-error@+1 {{'svmax_bf16_x4' needs target feature sme2,sve-b16b16}} + // expected-error@+1 {{'svmax_bf16_x4' needs target feature sme,sme2,sve-b16b16}} svmax_bf16_x4(bf16x4, bf16x4); - // expected-error@+1 {{'svmaxnm_single_bf16_x2' needs target feature sme2,sve-b16b16}} + // expected-error@+1 {{'svmaxnm_single_bf16_x2' needs target feature sme,sme2,sve-b16b16}} svmaxnm_single_bf16_x2(bf16x2, bf16); - // expected-error@+1 {{'svmaxnm_single_bf16_x4' needs target feature sme2,sve-b16b16}} + // expected-error@+1 {{'svmaxnm_single_bf16_x4' needs target feature sme,sme2,sve-b16b16}} svmaxnm_single_bf16_x4(bf16x4, bf16); - // expected-error@+1 {{'svmaxnm_bf16_x2' needs target feature sme2,sve-b16b16}} + // expected-error@+1 {{'svmaxnm_bf16_x2' needs target feature sme,sme2,sve-b16b16}} svmaxnm_bf16_x2(bf16x2, bf16x2); - // expected-error@+1 {{'svmaxnm_bf16_x4' needs target feature sme2,sve-b16b16}} + // expected-error@+1 {{'svmaxnm_bf16_x4' needs target feature sme,sme2,sve-b16b16}} svmaxnm_bf16_x4(bf16x4, bf16x4); - // expected-error@+1 {{'svmin_single_bf16_x2' needs target feature sme2,sve-b16b16}} + // expected-error@+1 {{'svmin_single_bf16_x2' needs target feature sme,sme2,sve-b16b16}} svmin_single_bf16_x2(bf16x2, bf16); - // expected-error@+1 {{'svmin_single_bf16_x4' needs target feature sme2,sve-b16b16}} + // expected-error@+1 {{'svmin_single_bf16_x4' needs target feature sme,sme2,sve-b16b16}} svmin_single_bf16_x4(bf16x4, bf16); - // expected-error@+1 {{'svmin_bf16_x2' needs target feature sme2,sve-b16b16}} + // expected-error@+1 {{'svmin_bf16_x2' needs target feature sme,sme2,sve-b16b16}} svmin_bf16_x2(bf16x2, bf16x2); - // expected-error@+1 {{'svmin_bf16_x4' needs target feature sme2,sve-b16b16}} + // expected-error@+1 {{'svmin_bf16_x4' needs target feature sme,sme2,sve-b16b16}} svmin_bf16_x4(bf16x4, bf16x4); - // expected-error@+1 {{'svminnm_single_bf16_x2' needs target feature sme2,sve-b16b16}} + // expected-error@+1 {{'svminnm_single_bf16_x2' needs target feature sme,sme2,sve-b16b16}} svminnm_single_bf16_x2(bf16x2, bf16); - // expected-error@+1 {{'svminnm_single_bf16_x4' needs target feature sme2,sve-b16b16}} + // expected-error@+1 {{'svminnm_single_bf16_x4' needs target feature sme,sme2,sve-b16b16}} svminnm_single_bf16_x4(bf16x4, bf16); - // expected-error@+1 {{'svminnm_bf16_x2' needs target feature sme2,sve-b16b16}} + // expected-error@+1 {{'svminnm_bf16_x2' needs target feature sme,sme2,sve-b16b16}} svminnm_bf16_x2(bf16x2, bf16x2); - // expected-error@+1 {{'svminnm_bf16_x4' needs target feature sme2,sve-b16b16}} + // expected-error@+1 {{'svminnm_bf16_x4' needs target feature sme,sme2,sve-b16b16}} svminnm_bf16_x4(bf16x4, bf16x4); -} \ No newline at end of file +} diff --git a/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_fmlas16.c b/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_fmlas16.c index e60f6a483c024f7..bfbcac20b4f000e 100644 --- a/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_fmlas16.c +++ b/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_fmlas16.c @@ -14,54 +14,54 @@ void test_features_f16f16(uint32_t slice, svbfloat16x4_t bzn4, svbfloat16x4_t bzm4) __arm_streaming __arm_inout("za") { - // expected-error@+1 {{'svmla_single_za16_f16_vg1x2' needs target feature sme-f16f16}} + // expected-error@+1 {{'svmla_single_za16_f16_vg1x2' needs target feature sme,sme-f16f16}} svmla_single_za16_f16_vg1x2(slice, zn2, zm); - // expected-error@+1 {{'svmla_single_za16_f16_vg1x4' needs target feature sme-f16f16}} + // expected-error@+1 {{'svmla_single_za16_f16_vg1x4' needs target feature sme,sme-f16f16}} svmla_single_za16_f16_vg1x4(slice, zn4, zm); - // expected-error@+1 {{'svmls_single_za16_f16_vg1x2' needs target feature sme-f16f16}} + // expected-error@+1 {{'svmls_single_za16_f16_vg1x2' needs target feature sme,sme-f16f16}} svmls_single_za16_f16_vg1x2(slice, zn2, zm); - // expected-error@+1 {{'svmls_single_za16_f16_vg1x4' needs target feature sme-f16f16}} + // expected-error@+1 {{'svmls_single_za16_f16_vg1x4' needs target feature sme,sme-f16f16}} svmls_single_za16_f16_vg1x4(slice, zn4, zm); - // expected-error@+1 {{'svmla_za16_f16_vg1x2' needs target feature sme-f16f16}} + // expected-error@+1 {{'svmla_za16_f16_vg1x2' needs target feature sme,sme-f16f16}} svmla_za16_f16_vg1x2(slice, zn2, zm2); - // expected-error@+1 {{'svmla_za16_f16_vg1x4' needs target feature sme-f16f16}} + // expected-error@+1 {{'svmla_za16_f16_vg1x4' needs target feature sme,sme-f16f16}} svmla_za16_f16_vg1x4(slice, zn4, zm4); - // expected-error@+1 {{'svmls_za16_f16_vg1x2' needs target feature sme-f16f16}} + // expected-error@+1 {{'svmls_za16_f16_vg1x2' needs target feature sme,sme-f16f16}} svmls_za16_f16_vg1x2(slice, zn2, zm2); - // expected-error@+1 {{'svmls_za16_f16_vg1x4' needs target feature sme-f16f16}} + // expected-error@+1 {{'svmls_za16_f16_vg1x4' needs target feature sme,sme-f16f16}} svmls_za16_f16_vg1x4(slice, zn4, zm4); - // expected-error@+1 {{'svmla_lane_za16_f16_vg1x2' needs target feature sme-f16f16}} + // expected-error@+1 {{'svmla_lane_za16_f16_vg1x2' needs target feature sme,sme-f16f16}} svmla_lane_za16_f16_vg1x2(slice, zn2, zm, 7); - // expected-error@+1 {{'svmla_lane_za16_f16_vg1x4' needs target feature sme-f16f16}} + // expected-error@+1 {{'svmla_lane_za16_f16_vg1x4' needs target feature sme,sme-f16f16}} svmla_lane_za16_f16_vg1x4(slice, zn4, zm, 7); - // expected-error@+1 {{'svmls_lane_za16_f16_vg1x2' needs target feature sme-f16f16}} + // expected-error@+1 {{'svmls_lane_za16_f16_vg1x2' needs target feature sme,sme-f16f16}} svmls_lane_za16_f16_vg1x2(slice, zn2, zm, 7); - // expected-error@+1 {{'svmls_lane_za16_f16_vg1x4' needs target feature sme-f16f16}} + // expected-error@+1 {{'svmls_lane_za16_f16_vg1x4' needs target feature sme,sme-f16f16}} svmls_lane_za16_f16_vg1x4(slice, zn4, zm, 7); - // expected-error@+1 {{'svmla_single_za16_bf16_vg1x2' needs target feature sme-b16b16}} + // expected-error@+1 {{'svmla_single_za16_bf16_vg1x2' needs target feature sme,sme-b16b16}} svmla_single_za16_bf16_vg1x2(slice, bzn2, bzm); - // expected-error@+1 {{'svmla_single_za16_bf16_vg1x4' needs target feature sme-b16b16}} + // expected-error@+1 {{'svmla_single_za16_bf16_vg1x4' needs target feature sme,sme-b16b16}} svmla_single_za16_bf16_vg1x4(slice, bzn4, bzm); - // expected-error@+1 {{'svmls_single_za16_bf16_vg1x2' needs target feature sme-b16b16}} + // expected-error@+1 {{'svmls_single_za16_bf16_vg1x2' needs target feature sme,sme-b16b16}} svmls_single_za16_bf16_vg1x2(slice, bzn2, bzm); - // expected-error@+1 {{'svmls_single_za16_bf16_vg1x4' needs target feature sme-b16b16}} + // expected-error@+1 {{'svmls_single_za16_bf16_vg1x4' needs target feature sme,sme-b16b16}} svmls_single_za16_bf16_vg1x4(slice, bzn4, bzm); - // expected-error@+1 {{'svmla_za16_bf16_vg1x2' needs target feature sme-b16b16}} + // expected-error@+1 {{'svmla_za16_bf16_vg1x2' needs target feature sme,sme-b16b16}} svmla_za16_bf16_vg1x2(slice, bzn2, bzm2); - // expected-error@+1 {{'svmla_za16_bf16_vg1x4' needs target feature sme-b16b16}} + // expected-error@+1 {{'svmla_za16_bf16_vg1x4' needs target feature sme,sme-b16b16}} svmla_za16_bf16_vg1x4(slice, bzn4, bzm4); - // expected-error@+1 {{'svmls_za16_bf16_vg1x2' needs target feature sme-b16b16}} + // expected-error@+1 {{'svmls_za16_bf16_vg1x2' needs target feature sme,sme-b16b16}} svmls_za16_bf16_vg1x2(slice, bzn2, bzm2); - // expected-error@+1 {{'svmls_za16_bf16_vg1x4' needs target feature sme-b16b16}} + // expected-error@+1 {{'svmls_za16_bf16_vg1x4' needs target feature sme,sme-b16b16}} svmls_za16_bf16_vg1x4(slice, bzn4, bzm4); - // expected-error@+1 {{'svmla_lane_za16_bf16_vg1x2' needs target feature sme-b16b16}} + // expected-error@+1 {{'svmla_lane_za16_bf16_vg1x2' needs target feature sme,sme-b16b16}} svmla_lane_za16_bf16_vg1x2(slice, bzn2, bzm, 7); - // expected-error@+1 {{'svmla_lane_za16_bf16_vg1x4' needs target feature sme-b16b16}} + // expected-error@+1 {{'svmla_lane_za16_bf16_vg1x4' needs target feature sme,sme-b16b16}} svmla_lane_za16_bf16_vg1x4(slice, bzn4, bzm, 7); - // expected-error@+1 {{'svmls_lane_za16_bf16_vg1x2' needs target feature sme-b16b16}} + // expected-error@+1 {{'svmls_lane_za16_bf16_vg1x2' needs target feature sme,sme-b16b16}} svmls_lane_za16_bf16_vg1x2(slice, bzn2, bzm, 7); - // expected-error@+1 {{'svmls_lane_za16_bf16_vg1x4' needs target feature sme-b16b16}} + // expected-error@+1 {{'svmls_lane_za16_bf16_vg1x4' needs target feature sme,sme-b16b16}} svmls_lane_za16_bf16_vg1x4(slice, bzn4, bzm, 7); } diff --git a/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c b/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c index 881c2f98e1e0b68..301f785327dc6c1 100644 --- a/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c +++ b/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c @@ -8,13 +8,13 @@ void test_features(svbool_t pn, svbool_t pm, svfloat16_t zn, svfloat16_t zm, svbfloat16_t znb, svbfloat16_t zmb) __arm_streaming __arm_inout("za") { -// expected-error@+1 {{'svmopa_za16_bf16_m' needs target feature sme-b16b16}} +// expected-error@+1 {{'svmopa_za16_bf16_m' needs target feature sme,sme-b16b16}} svmopa_za16_bf16_m(0, pn, pm, znb, zmb); -// expected-error@+1 {{'svmops_za16_bf16_m' needs target feature sme-b16b16}} +// expected-error@+1 {{'svmops_za16_bf16_m' needs target feature sme,sme-b16b16}} svmops_za16_bf16_m(0, pn, pm, znb, zmb); -// expected-error@+1 {{'svmopa_za16_f16_m' needs target feature sme-f16f16}} +// expected-error@+1 {{'svmopa_za16_f16_m' needs target feature sme,sme-f16f16}} svmopa_za16_f16_m(0, pn, pm, zn, zm); -// expected-error@+1 {{'svmops_za16_f16_m' needs target feature sme-f16f16}} +// expected-error@+1 {{'svmops_za16_f16_m' needs target feature sme,sme-f16f16}} svmops_za16_f16_m(0, pn, pm, zn, zm); } diff --git a/clang/test/Sema/aarch64-streaming-sme-or-nonstreaming-sve-builtins.c b/clang/test/Sema/aarch64-streaming-sme-or-nonstreaming-sve-builtins.c index 45776eb13e4fbc8..792d79ee3e600d6 100644 --- a/clang/test/Sema/aarch64-streaming-sme-or-nonstreaming-sve-builtins.c +++ b/clang/test/Sema/aarch64-streaming-sme-or-nonstreaming-sve-builtins.c @@ -38,6 +38,12 @@ svfloat32_t good6(svfloat32_t a, svfloat32_t b, svfloat32_t c) __arm_streaming_c return svclamp(a, b, c); } +// Test that the +sve-b16b16 is not considered an SVE flag (it applies to both) +__attribute__((target("+sme2,+sve2,+sve-b16b16"))) +svbfloat16_t good7(svbfloat16_t a, svbfloat16_t b, svbfloat16_t c) __arm_streaming { + return svclamp_bf16(a, b, c); +} + // Without '+sme2', the builtin is only valid in non-streaming mode. __attribute__((target("+sve2p1,+sme"))) svfloat32_t bad1(svfloat32_t a, svfloat32_t b, svfloat32_t c) __arm_streaming { diff --git a/clang/test/Sema/aarch64-sve2-intrinsics/acle_sve2.cpp b/clang/test/Sema/aarch64-sve2-intrinsics/acle_sve2.cpp index 8d32b5265b00313..4229f6c30ba7940 100644 --- a/clang/test/Sema/aarch64-sve2-intrinsics/acle_sve2.cpp +++ b/clang/test/Sema/aarch64-sve2-intrinsics/acle_sve2.cpp @@ -37,4949 +37,4949 @@ void test(svbool_t pg, const int8_t *const_i8_ptr, const uint8_t *const_u8_ptr, const int64_t *const_i64_ptr, const uint64_t *const_u64_ptr, const float16_t *const_f16_ptr, const float32_t *const_f32_ptr, const float64_t *const_f64_ptr) { - // expected-error@+2 {{'svhistseg_s8' needs target feature sve2}} - // overload-error@+1 {{'svhistseg' needs target feature sve2}} + // expected-error@+2 {{'svhistseg_s8' needs target feature sve,sve2}} + // overload-error@+1 {{'svhistseg' needs target feature sve,sve2}} SVE_ACLE_FUNC(svhistseg,_s8,,)(svundef_s8(), svundef_s8()); - // expected-error@+2 {{'svqrdmulh_s8' needs target feature sve2|sme}} - // overload-error@+1 {{'svqrdmulh' needs target feature sve2|sme}} + // expected-error@+2 {{'svqrdmulh_s8' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqrdmulh' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqrdmulh,_s8,,)(svundef_s8(), svundef_s8()); - // expected-error@+2 {{'svqrdmulh_n_s8' needs target feature sve2|sme}} - // overload-error@+1 {{'svqrdmulh' needs target feature sve2|sme}} + // expected-error@+2 {{'svqrdmulh_n_s8' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqrdmulh' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqrdmulh,_n_s8,,)(svundef_s8(), i8); - // expected-error@+2 {{'svqdmulh_s8' needs target feature sve2|sme}} - // overload-error@+1 {{'svqdmulh' needs target feature sve2|sme}} + // expected-error@+2 {{'svqdmulh_s8' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqdmulh' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqdmulh,_s8,,)(svundef_s8(), svundef_s8()); - // expected-error@+2 {{'svqdmulh_n_s8' needs target feature sve2|sme}} - // overload-error@+1 {{'svqdmulh' needs target feature sve2|sme}} + // expected-error@+2 {{'svqdmulh_n_s8' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqdmulh' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqdmulh,_n_s8,,)(svundef_s8(), i8); - // expected-error@+2 {{'svsra_n_s8' needs target feature sve2|sme}} - // overload-error@+1 {{'svsra' needs target feature sve2|sme}} + // expected-error@+2 {{'svsra_n_s8' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svsra' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svsra,_n_s8,,)(svundef_s8(), svundef_s8(), 1); - // expected-error@+2 {{'svnbsl_s8' needs target feature sve2|sme}} - // overload-error@+1 {{'svnbsl' needs target feature sve2|sme}} + // expected-error@+2 {{'svnbsl_s8' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svnbsl' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svnbsl,_s8,,)(svundef_s8(), svundef_s8(), svundef_s8()); - // expected-error@+2 {{'svnbsl_n_s8' needs target feature sve2|sme}} - // overload-error@+1 {{'svnbsl' needs target feature sve2|sme}} + // expected-error@+2 {{'svnbsl_n_s8' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svnbsl' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svnbsl,_n_s8,,)(svundef_s8(), svundef_s8(), i8); - // expected-error@+2 {{'svqabs_s8_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svqabs_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svqabs_s8_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqabs_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqabs,_s8,_z,)(pg, svundef_s8()); - // expected-error@+2 {{'svqabs_s8_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svqabs_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svqabs_s8_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqabs_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqabs,_s8,_m,)(svundef_s8(), pg, svundef_s8()); - // expected-error@+2 {{'svqabs_s8_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svqabs_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svqabs_s8_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqabs_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqabs,_s8,_x,)(pg, svundef_s8()); - // expected-error@+2 {{'svcadd_s8' needs target feature sve2|sme}} - // overload-error@+1 {{'svcadd' needs target feature sve2|sme}} + // expected-error@+2 {{'svcadd_s8' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svcadd' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svcadd,_s8,,)(svundef_s8(), svundef_s8(), 90); - // expected-error@+2 {{'svtbl2_s8' needs target feature sve2|sme}} - // overload-error@+1 {{'svtbl2' needs target feature sve2|sme}} + // expected-error@+2 {{'svtbl2_s8' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svtbl2' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svtbl2,_s8,,)(svundef2_s8(), svundef_u8()); - // expected-error@+2 {{'svhsubr_s8_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svhsubr_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svhsubr_s8_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhsubr_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhsubr,_s8,_z,)(pg, svundef_s8(), svundef_s8()); - // expected-error@+2 {{'svhsubr_s8_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svhsubr_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svhsubr_s8_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhsubr_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhsubr,_s8,_m,)(pg, svundef_s8(), svundef_s8()); - // expected-error@+2 {{'svhsubr_s8_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svhsubr_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svhsubr_s8_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhsubr_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhsubr,_s8,_x,)(pg, svundef_s8(), svundef_s8()); - // expected-error@+2 {{'svhsubr_n_s8_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svhsubr_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svhsubr_n_s8_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhsubr_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhsubr,_n_s8,_z,)(pg, svundef_s8(), i8); - // expected-error@+2 {{'svhsubr_n_s8_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svhsubr_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svhsubr_n_s8_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhsubr_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhsubr,_n_s8,_m,)(pg, svundef_s8(), i8); - // expected-error@+2 {{'svhsubr_n_s8_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svhsubr_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svhsubr_n_s8_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhsubr_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhsubr,_n_s8,_x,)(pg, svundef_s8(), i8); - // expected-error@+2 {{'sveortb_s8' needs target feature sve2|sme}} - // overload-error@+1 {{'sveortb' needs target feature sve2|sme}} + // expected-error@+2 {{'sveortb_s8' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'sveortb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(sveortb,_s8,,)(svundef_s8(), svundef_s8(), svundef_s8()); - // expected-error@+2 {{'sveortb_n_s8' needs target feature sve2|sme}} - // overload-error@+1 {{'sveortb' needs target feature sve2|sme}} + // expected-error@+2 {{'sveortb_n_s8' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'sveortb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(sveortb,_n_s8,,)(svundef_s8(), svundef_s8(), i8); - // expected-error@+2 {{'svbcax_s8' needs target feature sve2|sme}} - // overload-error@+1 {{'svbcax' needs target feature sve2|sme}} + // expected-error@+2 {{'svbcax_s8' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svbcax' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svbcax,_s8,,)(svundef_s8(), svundef_s8(), svundef_s8()); - // expected-error@+2 {{'svbcax_n_s8' needs target feature sve2|sme}} - // overload-error@+1 {{'svbcax' needs target feature sve2|sme}} + // expected-error@+2 {{'svbcax_n_s8' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svbcax' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svbcax,_n_s8,,)(svundef_s8(), svundef_s8(), i8); - // expected-error@+2 {{'svqshlu_n_s8_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svqshlu_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svqshlu_n_s8_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqshlu_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqshlu,_n_s8,_z,)(pg, svundef_s8(), 1); - // expected-error@+2 {{'svqrshl_s8_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svqrshl_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svqrshl_s8_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqrshl_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqrshl,_s8,_z,)(pg, svundef_s8(), svundef_s8()); - // expected-error@+2 {{'svqrshl_s8_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svqrshl_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svqrshl_s8_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqrshl_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqrshl,_s8,_m,)(pg, svundef_s8(), svundef_s8()); - // expected-error@+2 {{'svqrshl_s8_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svqrshl_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svqrshl_s8_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqrshl_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqrshl,_s8,_x,)(pg, svundef_s8(), svundef_s8()); - // expected-error@+2 {{'svqrshl_n_s8_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svqrshl_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svqrshl_n_s8_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqrshl_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqrshl,_n_s8,_z,)(pg, svundef_s8(), i8); - // expected-error@+2 {{'svqrshl_n_s8_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svqrshl_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svqrshl_n_s8_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqrshl_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqrshl,_n_s8,_m,)(pg, svundef_s8(), i8); - // expected-error@+2 {{'svqrshl_n_s8_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svqrshl_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svqrshl_n_s8_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqrshl_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqrshl,_n_s8,_x,)(pg, svundef_s8(), i8); - // expected-error@+2 {{'svcmla_s8' needs target feature sve2|sme}} - // overload-error@+1 {{'svcmla' needs target feature sve2|sme}} + // expected-error@+2 {{'svcmla_s8' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svcmla' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svcmla,_s8,,)(svundef_s8(), svundef_s8(), svundef_s8(), 90); - // expected-error@+2 {{'svqsubr_s8_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svqsubr_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svqsubr_s8_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqsubr_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqsubr,_s8,_z,)(pg, svundef_s8(), svundef_s8()); - // expected-error@+2 {{'svqsubr_s8_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svqsubr_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svqsubr_s8_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqsubr_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqsubr,_s8,_m,)(pg, svundef_s8(), svundef_s8()); - // expected-error@+2 {{'svqsubr_s8_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svqsubr_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svqsubr_s8_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqsubr_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqsubr,_s8,_x,)(pg, svundef_s8(), svundef_s8()); - // expected-error@+2 {{'svqsubr_n_s8_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svqsubr_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svqsubr_n_s8_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqsubr_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqsubr,_n_s8,_z,)(pg, svundef_s8(), i8); - // expected-error@+2 {{'svqsubr_n_s8_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svqsubr_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svqsubr_n_s8_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqsubr_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqsubr,_n_s8,_m,)(pg, svundef_s8(), i8); - // expected-error@+2 {{'svqsubr_n_s8_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svqsubr_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svqsubr_n_s8_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqsubr_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqsubr,_n_s8,_x,)(pg, svundef_s8(), i8); - // expected-error@+2 {{'svrshr_n_s8_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svrshr_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svrshr_n_s8_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrshr_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrshr,_n_s8,_z,)(pg, svundef_s8(), 1); - // expected-error@+2 {{'svaddp_s8_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svaddp_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svaddp_s8_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svaddp_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svaddp,_s8,_m,)(pg, svundef_s8(), svundef_s8()); - // expected-error@+2 {{'svaddp_s8_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svaddp_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svaddp_s8_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svaddp_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svaddp,_s8,_x,)(pg, svundef_s8(), svundef_s8()); - // expected-error@+2 {{'svqadd_s8_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svqadd_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svqadd_s8_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqadd_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqadd,_s8,_m,)(pg, svundef_s8(), svundef_s8()); - // expected-error@+2 {{'svqadd_n_s8_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svqadd_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svqadd_n_s8_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqadd_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqadd,_n_s8,_m,)(pg, svundef_s8(), i8); - // expected-error@+2 {{'svqadd_s8_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svqadd_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svqadd_s8_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqadd_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqadd,_s8,_z,)(pg, svundef_s8(), svundef_s8()); - // expected-error@+2 {{'svqadd_n_s8_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svqadd_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svqadd_n_s8_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqadd_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqadd,_n_s8,_z,)(pg, svundef_s8(), i8); - // expected-error@+2 {{'svqadd_s8_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svqadd_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svqadd_s8_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqadd_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqadd,_s8,_x,)(pg, svundef_s8(), svundef_s8()); - // expected-error@+2 {{'svqadd_n_s8_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svqadd_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svqadd_n_s8_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqadd_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqadd,_n_s8,_x,)(pg, svundef_s8(), i8); - // expected-error@+2 {{'svtbx_s8' needs target feature sve2|sme}} - // overload-error@+1 {{'svtbx' needs target feature sve2|sme}} + // expected-error@+2 {{'svtbx_s8' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svtbx' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svtbx,_s8,,)(svundef_s8(), svundef_s8(), svundef_u8()); - // expected-error@+2 {{'svqrdcmlah_s8' needs target feature sve2|sme}} - // overload-error@+1 {{'svqrdcmlah' needs target feature sve2|sme}} + // expected-error@+2 {{'svqrdcmlah_s8' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqrdcmlah' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqrdcmlah,_s8,,)(svundef_s8(), svundef_s8(), svundef_s8(), 90); - // expected-error@+2 {{'svminp_s8_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svminp_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svminp_s8_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svminp_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svminp,_s8,_m,)(pg, svundef_s8(), svundef_s8()); - // expected-error@+2 {{'svminp_s8_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svminp_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svminp_s8_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svminp_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svminp,_s8,_x,)(pg, svundef_s8(), svundef_s8()); - // expected-error@+2 {{'svqsub_s8_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svqsub_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svqsub_s8_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqsub_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqsub,_s8,_z,)(pg, svundef_s8(), svundef_s8()); - // expected-error@+2 {{'svqsub_s8_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svqsub_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svqsub_s8_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqsub_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqsub,_s8,_m,)(pg, svundef_s8(), svundef_s8()); - // expected-error@+2 {{'svqsub_s8_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svqsub_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svqsub_s8_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqsub_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqsub,_s8,_x,)(pg, svundef_s8(), svundef_s8()); - // expected-error@+2 {{'svqsub_n_s8_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svqsub_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svqsub_n_s8_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqsub_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqsub,_n_s8,_z,)(pg, svundef_s8(), i8); - // expected-error@+2 {{'svqsub_n_s8_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svqsub_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svqsub_n_s8_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqsub_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqsub,_n_s8,_m,)(pg, svundef_s8(), i8); - // expected-error@+2 {{'svqsub_n_s8_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svqsub_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svqsub_n_s8_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqsub_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqsub,_n_s8,_x,)(pg, svundef_s8(), i8); - // expected-error@+2 {{'svrsra_n_s8' needs target feature sve2|sme}} - // overload-error@+1 {{'svrsra' needs target feature sve2|sme}} + // expected-error@+2 {{'svrsra_n_s8' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrsra' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrsra,_n_s8,,)(svundef_s8(), svundef_s8(), 1); - // expected-error@+2 {{'sveor3_s8' needs target feature sve2|sme}} - // overload-error@+1 {{'sveor3' needs target feature sve2|sme}} + // expected-error@+2 {{'sveor3_s8' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'sveor3' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(sveor3,_s8,,)(svundef_s8(), svundef_s8(), svundef_s8()); - // expected-error@+2 {{'sveor3_n_s8' needs target feature sve2|sme}} - // overload-error@+1 {{'sveor3' needs target feature sve2|sme}} + // expected-error@+2 {{'sveor3_n_s8' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'sveor3' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(sveor3,_n_s8,,)(svundef_s8(), svundef_s8(), i8); - // expected-error@+2 {{'svhadd_s8_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svhadd_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svhadd_s8_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhadd_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhadd,_s8,_m,)(pg, svundef_s8(), svundef_s8()); - // expected-error@+2 {{'svhadd_n_s8_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svhadd_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svhadd_n_s8_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhadd_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhadd,_n_s8,_m,)(pg, svundef_s8(), i8); - // expected-error@+2 {{'svhadd_s8_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svhadd_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svhadd_s8_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhadd_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhadd,_s8,_z,)(pg, svundef_s8(), svundef_s8()); - // expected-error@+2 {{'svhadd_n_s8_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svhadd_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svhadd_n_s8_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhadd_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhadd,_n_s8,_z,)(pg, svundef_s8(), i8); - // expected-error@+2 {{'svhadd_s8_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svhadd_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svhadd_s8_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhadd_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhadd,_s8,_x,)(pg, svundef_s8(), svundef_s8()); - // expected-error@+2 {{'svhadd_n_s8_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svhadd_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svhadd_n_s8_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhadd_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhadd,_n_s8,_x,)(pg, svundef_s8(), i8); - // expected-error@+2 {{'svqrdmlsh_s8' needs target feature sve2|sme}} - // overload-error@+1 {{'svqrdmlsh' needs target feature sve2|sme}} + // expected-error@+2 {{'svqrdmlsh_s8' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqrdmlsh' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqrdmlsh,_s8,,)(svundef_s8(), svundef_s8(), svundef_s8()); - // expected-error@+2 {{'svqrdmlsh_n_s8' needs target feature sve2|sme}} - // overload-error@+1 {{'svqrdmlsh' needs target feature sve2|sme}} + // expected-error@+2 {{'svqrdmlsh_n_s8' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqrdmlsh' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqrdmlsh,_n_s8,,)(svundef_s8(), svundef_s8(), i8); - // expected-error@+2 {{'svmaxp_s8_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svmaxp_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svmaxp_s8_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svmaxp_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svmaxp,_s8,_m,)(pg, svundef_s8(), svundef_s8()); - // expected-error@+2 {{'svmaxp_s8_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svmaxp_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svmaxp_s8_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svmaxp_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svmaxp,_s8,_x,)(pg, svundef_s8(), svundef_s8()); - // expected-error@+2 {{'svmatch_s8' needs target feature sve2}} - // overload-error@+1 {{'svmatch' needs target feature sve2}} + // expected-error@+2 {{'svmatch_s8' needs target feature sve,sve2}} + // overload-error@+1 {{'svmatch' needs target feature sve,sve2}} SVE_ACLE_FUNC(svmatch,_s8,,)(pg, svundef_s8(), svundef_s8()); - // expected-error@+2 {{'svwhilerw_s8' needs target feature sve2|sme}} - // overload-error@+1 {{'svwhilerw' needs target feature sve2|sme}} + // expected-error@+2 {{'svwhilerw_s8' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svwhilerw' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svwhilerw,_s8,,)(const_i8_ptr, const_i8_ptr); - // expected-error@+2 {{'svqcadd_s8' needs target feature sve2|sme}} - // overload-error@+1 {{'svqcadd' needs target feature sve2|sme}} + // expected-error@+2 {{'svqcadd_s8' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqcadd' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqcadd,_s8,,)(svundef_s8(), svundef_s8(), 90); - // expected-error@+2 {{'svrhadd_s8_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svrhadd_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svrhadd_s8_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrhadd_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrhadd,_s8,_m,)(pg, svundef_s8(), svundef_s8()); - // expected-error@+2 {{'svrhadd_n_s8_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svrhadd_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svrhadd_n_s8_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrhadd_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrhadd,_n_s8,_m,)(pg, svundef_s8(), i8); - // expected-error@+2 {{'svrhadd_s8_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svrhadd_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svrhadd_s8_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrhadd_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrhadd,_s8,_z,)(pg, svundef_s8(), svundef_s8()); - // expected-error@+2 {{'svrhadd_n_s8_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svrhadd_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svrhadd_n_s8_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrhadd_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrhadd,_n_s8,_z,)(pg, svundef_s8(), i8); - // expected-error@+2 {{'svrhadd_s8_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svrhadd_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svrhadd_s8_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrhadd_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrhadd,_s8,_x,)(pg, svundef_s8(), svundef_s8()); - // expected-error@+2 {{'svrhadd_n_s8_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svrhadd_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svrhadd_n_s8_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrhadd_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrhadd,_n_s8,_x,)(pg, svundef_s8(), i8); - // expected-error@+2 {{'svwhilewr_s8' needs target feature sve2|sme}} - // overload-error@+1 {{'svwhilewr' needs target feature sve2|sme}} + // expected-error@+2 {{'svwhilewr_s8' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svwhilewr' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svwhilewr,_s8,,)(const_i8_ptr, const_i8_ptr); - // expected-error@+2 {{'svsli_n_s8' needs target feature sve2|sme}} - // overload-error@+1 {{'svsli' needs target feature sve2|sme}} + // expected-error@+2 {{'svsli_n_s8' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svsli' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svsli,_n_s8,,)(svundef_s8(), svundef_s8(), 1); - // expected-error@+2 {{'svnmatch_s8' needs target feature sve2}} - // overload-error@+1 {{'svnmatch' needs target feature sve2}} + // expected-error@+2 {{'svnmatch_s8' needs target feature sve,sve2}} + // overload-error@+1 {{'svnmatch' needs target feature sve,sve2}} SVE_ACLE_FUNC(svnmatch,_s8,,)(pg, svundef_s8(), svundef_s8()); - // expected-error@+2 {{'svaba_s8' needs target feature sve2|sme}} - // overload-error@+1 {{'svaba' needs target feature sve2|sme}} + // expected-error@+2 {{'svaba_s8' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svaba' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svaba,_s8,,)(svundef_s8(), svundef_s8(), svundef_s8()); - // expected-error@+2 {{'svaba_n_s8' needs target feature sve2|sme}} - // overload-error@+1 {{'svaba' needs target feature sve2|sme}} + // expected-error@+2 {{'svaba_n_s8' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svaba' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svaba,_n_s8,,)(svundef_s8(), svundef_s8(), i8); - // expected-error@+2 {{'svuqadd_s8_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svuqadd_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svuqadd_s8_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svuqadd_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svuqadd,_s8,_m,)(pg, svundef_s8(), svundef_u8()); - // expected-error@+2 {{'svuqadd_n_s8_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svuqadd_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svuqadd_n_s8_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svuqadd_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svuqadd,_n_s8,_m,)(pg, svundef_s8(), u8); - // expected-error@+2 {{'svuqadd_s8_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svuqadd_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svuqadd_s8_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svuqadd_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svuqadd,_s8,_z,)(pg, svundef_s8(), svundef_u8()); - // expected-error@+2 {{'svuqadd_n_s8_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svuqadd_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svuqadd_n_s8_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svuqadd_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svuqadd,_n_s8,_z,)(pg, svundef_s8(), u8); - // expected-error@+2 {{'svuqadd_s8_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svuqadd_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svuqadd_s8_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svuqadd_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svuqadd,_s8,_x,)(pg, svundef_s8(), svundef_u8()); - // expected-error@+2 {{'svuqadd_n_s8_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svuqadd_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svuqadd_n_s8_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svuqadd_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svuqadd,_n_s8,_x,)(pg, svundef_s8(), u8); - // expected-error@+2 {{'sveorbt_s8' needs target feature sve2|sme}} - // overload-error@+1 {{'sveorbt' needs target feature sve2|sme}} + // expected-error@+2 {{'sveorbt_s8' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'sveorbt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(sveorbt,_s8,,)(svundef_s8(), svundef_s8(), svundef_s8()); - // expected-error@+2 {{'sveorbt_n_s8' needs target feature sve2|sme}} - // overload-error@+1 {{'sveorbt' needs target feature sve2|sme}} + // expected-error@+2 {{'sveorbt_n_s8' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'sveorbt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(sveorbt,_n_s8,,)(svundef_s8(), svundef_s8(), i8); - // expected-error@+2 {{'svbsl_s8' needs target feature sve2|sme}} - // overload-error@+1 {{'svbsl' needs target feature sve2|sme}} + // expected-error@+2 {{'svbsl_s8' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svbsl' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svbsl,_s8,,)(svundef_s8(), svundef_s8(), svundef_s8()); - // expected-error@+2 {{'svbsl_n_s8' needs target feature sve2|sme}} - // overload-error@+1 {{'svbsl' needs target feature sve2|sme}} + // expected-error@+2 {{'svbsl_n_s8' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svbsl' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svbsl,_n_s8,,)(svundef_s8(), svundef_s8(), i8); - // expected-error@+2 {{'svhsub_s8_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svhsub_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svhsub_s8_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhsub_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhsub,_s8,_z,)(pg, svundef_s8(), svundef_s8()); - // expected-error@+2 {{'svhsub_s8_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svhsub_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svhsub_s8_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhsub_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhsub,_s8,_m,)(pg, svundef_s8(), svundef_s8()); - // expected-error@+2 {{'svhsub_s8_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svhsub_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svhsub_s8_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhsub_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhsub,_s8,_x,)(pg, svundef_s8(), svundef_s8()); - // expected-error@+2 {{'svhsub_n_s8_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svhsub_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svhsub_n_s8_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhsub_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhsub,_n_s8,_z,)(pg, svundef_s8(), i8); - // expected-error@+2 {{'svhsub_n_s8_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svhsub_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svhsub_n_s8_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhsub_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhsub,_n_s8,_m,)(pg, svundef_s8(), i8); - // expected-error@+2 {{'svhsub_n_s8_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svhsub_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svhsub_n_s8_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhsub_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhsub,_n_s8,_x,)(pg, svundef_s8(), i8); - // expected-error@+2 {{'svqrdmlah_s8' needs target feature sve2|sme}} - // overload-error@+1 {{'svqrdmlah' needs target feature sve2|sme}} + // expected-error@+2 {{'svqrdmlah_s8' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqrdmlah' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqrdmlah,_s8,,)(svundef_s8(), svundef_s8(), svundef_s8()); - // expected-error@+2 {{'svqrdmlah_n_s8' needs target feature sve2|sme}} - // overload-error@+1 {{'svqrdmlah' needs target feature sve2|sme}} + // expected-error@+2 {{'svqrdmlah_n_s8' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqrdmlah' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqrdmlah,_n_s8,,)(svundef_s8(), svundef_s8(), i8); - // expected-error@+2 {{'svbsl2n_s8' needs target feature sve2|sme}} - // overload-error@+1 {{'svbsl2n' needs target feature sve2|sme}} + // expected-error@+2 {{'svbsl2n_s8' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svbsl2n' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svbsl2n,_s8,,)(svundef_s8(), svundef_s8(), svundef_s8()); - // expected-error@+2 {{'svbsl2n_n_s8' needs target feature sve2|sme}} - // overload-error@+1 {{'svbsl2n' needs target feature sve2|sme}} + // expected-error@+2 {{'svbsl2n_n_s8' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svbsl2n' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svbsl2n,_n_s8,,)(svundef_s8(), svundef_s8(), i8); - // expected-error@+2 {{'svsri_n_s8' needs target feature sve2|sme}} - // overload-error@+1 {{'svsri' needs target feature sve2|sme}} + // expected-error@+2 {{'svsri_n_s8' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svsri' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svsri,_n_s8,,)(svundef_s8(), svundef_s8(), 1); - // expected-error@+2 {{'svbsl1n_s8' needs target feature sve2|sme}} - // overload-error@+1 {{'svbsl1n' needs target feature sve2|sme}} + // expected-error@+2 {{'svbsl1n_s8' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svbsl1n' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svbsl1n,_s8,,)(svundef_s8(), svundef_s8(), svundef_s8()); - // expected-error@+2 {{'svbsl1n_n_s8' needs target feature sve2|sme}} - // overload-error@+1 {{'svbsl1n' needs target feature sve2|sme}} + // expected-error@+2 {{'svbsl1n_n_s8' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svbsl1n' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svbsl1n,_n_s8,,)(svundef_s8(), svundef_s8(), i8); - // expected-error@+2 {{'svrshl_s8_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svrshl_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svrshl_s8_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrshl_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrshl,_s8,_z,)(pg, svundef_s8(), svundef_s8()); - // expected-error@+2 {{'svrshl_s8_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svrshl_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svrshl_s8_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrshl_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrshl,_s8,_m,)(pg, svundef_s8(), svundef_s8()); - // expected-error@+2 {{'svrshl_s8_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svrshl_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svrshl_s8_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrshl_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrshl,_s8,_x,)(pg, svundef_s8(), svundef_s8()); - // expected-error@+2 {{'svrshl_n_s8_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svrshl_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svrshl_n_s8_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrshl_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrshl,_n_s8,_z,)(pg, svundef_s8(), i8); - // expected-error@+2 {{'svrshl_n_s8_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svrshl_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svrshl_n_s8_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrshl_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrshl,_n_s8,_m,)(pg, svundef_s8(), i8); - // expected-error@+2 {{'svrshl_n_s8_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svrshl_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svrshl_n_s8_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrshl_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrshl,_n_s8,_x,)(pg, svundef_s8(), i8); - // expected-error@+2 {{'svqneg_s8_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svqneg_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svqneg_s8_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqneg_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqneg,_s8,_z,)(pg, svundef_s8()); - // expected-error@+2 {{'svqneg_s8_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svqneg_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svqneg_s8_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqneg_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqneg,_s8,_m,)(svundef_s8(), pg, svundef_s8()); - // expected-error@+2 {{'svqneg_s8_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svqneg_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svqneg_s8_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqneg_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqneg,_s8,_x,)(pg, svundef_s8()); - // expected-error@+2 {{'svxar_n_s8' needs target feature sve2|sme}} - // overload-error@+1 {{'svxar' needs target feature sve2|sme}} + // expected-error@+2 {{'svxar_n_s8' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svxar' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svxar,_n_s8,,)(svundef_s8(), svundef_s8(), 1); - // expected-error@+2 {{'svqshl_s8_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svqshl_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svqshl_s8_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqshl_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqshl,_s8,_z,)(pg, svundef_s8(), svundef_s8()); - // expected-error@+2 {{'svqshl_s8_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svqshl_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svqshl_s8_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqshl_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqshl,_s8,_m,)(pg, svundef_s8(), svundef_s8()); - // expected-error@+2 {{'svqshl_s8_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svqshl_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svqshl_s8_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqshl_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqshl,_s8,_x,)(pg, svundef_s8(), svundef_s8()); - // expected-error@+2 {{'svqshl_n_s8_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svqshl_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svqshl_n_s8_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqshl_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqshl,_n_s8,_z,)(pg, svundef_s8(), i8); - // expected-error@+2 {{'svqshl_n_s8_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svqshl_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svqshl_n_s8_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqshl_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqshl,_n_s8,_m,)(pg, svundef_s8(), i8); - // expected-error@+2 {{'svqshl_n_s8_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svqshl_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svqshl_n_s8_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqshl_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqshl,_n_s8,_x,)(pg, svundef_s8(), i8); - // expected-error@+2 {{'svmullb_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svmullb' needs target feature sve2|sme}} + // expected-error@+2 {{'svmullb_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svmullb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svmullb,_s16,,)(svundef_s8(), svundef_s8()); - // expected-error@+2 {{'svmullb_n_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svmullb' needs target feature sve2|sme}} + // expected-error@+2 {{'svmullb_n_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svmullb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svmullb,_n_s16,,)(svundef_s8(), i8); - // expected-error@+2 {{'svqrshrunb_n_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svqrshrunb' needs target feature sve2|sme}} + // expected-error@+2 {{'svqrshrunb_n_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqrshrunb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqrshrunb,_n_s16,,)(svundef_s16(), 1); - // expected-error@+2 {{'svqdmlalbt_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svqdmlalbt' needs target feature sve2|sme}} + // expected-error@+2 {{'svqdmlalbt_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqdmlalbt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqdmlalbt,_s16,,)(svundef_s16(), svundef_s8(), svundef_s8()); - // expected-error@+2 {{'svqdmlalbt_n_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svqdmlalbt' needs target feature sve2|sme}} + // expected-error@+2 {{'svqdmlalbt_n_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqdmlalbt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqdmlalbt,_n_s16,,)(svundef_s16(), svundef_s8(), i8); - // expected-error@+2 {{'svqrdmulh_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svqrdmulh' needs target feature sve2|sme}} + // expected-error@+2 {{'svqrdmulh_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqrdmulh' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqrdmulh,_s16,,)(svundef_s16(), svundef_s16()); - // expected-error@+2 {{'svqrdmulh_n_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svqrdmulh' needs target feature sve2|sme}} + // expected-error@+2 {{'svqrdmulh_n_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqrdmulh' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqrdmulh,_n_s16,,)(svundef_s16(), i16); - // expected-error@+2 {{'svqrdmulh_lane_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svqrdmulh_lane' needs target feature sve2|sme}} + // expected-error@+2 {{'svqrdmulh_lane_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqrdmulh_lane' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqrdmulh_lane,_s16,,)(svundef_s16(), svundef_s16(), 1); - // expected-error@+2 {{'svaddwb_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svaddwb' needs target feature sve2|sme}} + // expected-error@+2 {{'svaddwb_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svaddwb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svaddwb,_s16,,)(svundef_s16(), svundef_s8()); - // expected-error@+2 {{'svaddwb_n_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svaddwb' needs target feature sve2|sme}} + // expected-error@+2 {{'svaddwb_n_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svaddwb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svaddwb,_n_s16,,)(svundef_s16(), i8); - // expected-error@+2 {{'svsubhnb_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svsubhnb' needs target feature sve2|sme}} + // expected-error@+2 {{'svsubhnb_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svsubhnb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svsubhnb,_s16,,)(svundef_s16(), svundef_s16()); - // expected-error@+2 {{'svsubhnb_n_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svsubhnb' needs target feature sve2|sme}} + // expected-error@+2 {{'svsubhnb_n_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svsubhnb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svsubhnb,_n_s16,,)(svundef_s16(), i16); - // expected-error@+2 {{'svqdmulh_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svqdmulh' needs target feature sve2|sme}} + // expected-error@+2 {{'svqdmulh_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqdmulh' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqdmulh,_s16,,)(svundef_s16(), svundef_s16()); - // expected-error@+2 {{'svqdmulh_n_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svqdmulh' needs target feature sve2|sme}} + // expected-error@+2 {{'svqdmulh_n_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqdmulh' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqdmulh,_n_s16,,)(svundef_s16(), i16); - // expected-error@+2 {{'svqdmulh_lane_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svqdmulh_lane' needs target feature sve2|sme}} + // expected-error@+2 {{'svqdmulh_lane_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqdmulh_lane' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqdmulh_lane,_s16,,)(svundef_s16(), svundef_s16(), 1); - // expected-error@+2 {{'svqshrunt_n_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svqshrunt' needs target feature sve2|sme}} + // expected-error@+2 {{'svqshrunt_n_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqshrunt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqshrunt,_n_s16,,)(svundef_u8(), svundef_s16(), 1); - // expected-error@+2 {{'svrsubhnt_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svrsubhnt' needs target feature sve2|sme}} + // expected-error@+2 {{'svrsubhnt_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrsubhnt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrsubhnt,_s16,,)(svundef_s8(), svundef_s16(), svundef_s16()); - // expected-error@+2 {{'svrsubhnt_n_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svrsubhnt' needs target feature sve2|sme}} + // expected-error@+2 {{'svrsubhnt_n_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrsubhnt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrsubhnt,_n_s16,,)(svundef_s8(), svundef_s16(), i16); - // expected-error@+2 {{'svnbsl_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svnbsl' needs target feature sve2|sme}} + // expected-error@+2 {{'svnbsl_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svnbsl' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svnbsl,_s16,,)(svundef_s16(), svundef_s16(), svundef_s16()); - // expected-error@+2 {{'svnbsl_n_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svnbsl' needs target feature sve2|sme}} + // expected-error@+2 {{'svnbsl_n_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svnbsl' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svnbsl,_n_s16,,)(svundef_s16(), svundef_s16(), i16); - // expected-error@+2 {{'svqdmlslb_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svqdmlslb' needs target feature sve2|sme}} + // expected-error@+2 {{'svqdmlslb_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqdmlslb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqdmlslb,_s16,,)(svundef_s16(), svundef_s8(), svundef_s8()); - // expected-error@+2 {{'svqdmlslb_n_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svqdmlslb' needs target feature sve2|sme}} + // expected-error@+2 {{'svqdmlslb_n_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqdmlslb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqdmlslb,_n_s16,,)(svundef_s16(), svundef_s8(), i8); - // expected-error@+2 {{'svsubhnt_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svsubhnt' needs target feature sve2|sme}} + // expected-error@+2 {{'svsubhnt_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svsubhnt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svsubhnt,_s16,,)(svundef_s8(), svundef_s16(), svundef_s16()); - // expected-error@+2 {{'svsubhnt_n_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svsubhnt' needs target feature sve2|sme}} + // expected-error@+2 {{'svsubhnt_n_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svsubhnt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svsubhnt,_n_s16,,)(svundef_s8(), svundef_s16(), i16); - // expected-error@+2 {{'svqabs_s16_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svqabs_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svqabs_s16_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqabs_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqabs,_s16,_z,)(pg, svundef_s16()); - // expected-error@+2 {{'svqabs_s16_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svqabs_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svqabs_s16_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqabs_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqabs,_s16,_m,)(svundef_s16(), pg, svundef_s16()); - // expected-error@+2 {{'svqabs_s16_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svqabs_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svqabs_s16_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqabs_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqabs,_s16,_x,)(pg, svundef_s16()); - // expected-error@+2 {{'svaddlbt_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svaddlbt' needs target feature sve2|sme}} + // expected-error@+2 {{'svaddlbt_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svaddlbt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svaddlbt,_s16,,)(svundef_s8(), svundef_s8()); - // expected-error@+2 {{'svaddlbt_n_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svaddlbt' needs target feature sve2|sme}} + // expected-error@+2 {{'svaddlbt_n_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svaddlbt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svaddlbt,_n_s16,,)(svundef_s8(), i8); - // expected-error@+2 {{'svtbl2_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svtbl2' needs target feature sve2|sme}} + // expected-error@+2 {{'svtbl2_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svtbl2' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svtbl2,_s16,,)(svundef2_s16(), svundef_u16()); - // expected-error@+2 {{'svshrnt_n_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svshrnt' needs target feature sve2|sme}} + // expected-error@+2 {{'svshrnt_n_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svshrnt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svshrnt,_n_s16,,)(svundef_s8(), svundef_s16(), 1); - // expected-error@+2 {{'svhsubr_s16_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svhsubr_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svhsubr_s16_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhsubr_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhsubr,_s16,_z,)(pg, svundef_s16(), svundef_s16()); - // expected-error@+2 {{'svhsubr_s16_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svhsubr_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svhsubr_s16_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhsubr_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhsubr,_s16,_m,)(pg, svundef_s16(), svundef_s16()); - // expected-error@+2 {{'svhsubr_s16_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svhsubr_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svhsubr_s16_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhsubr_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhsubr,_s16,_x,)(pg, svundef_s16(), svundef_s16()); - // expected-error@+2 {{'svhsubr_n_s16_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svhsubr_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svhsubr_n_s16_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhsubr_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhsubr,_n_s16,_z,)(pg, svundef_s16(), i16); - // expected-error@+2 {{'svhsubr_n_s16_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svhsubr_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svhsubr_n_s16_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhsubr_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhsubr,_n_s16,_m,)(pg, svundef_s16(), i16); - // expected-error@+2 {{'svhsubr_n_s16_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svhsubr_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svhsubr_n_s16_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhsubr_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhsubr,_n_s16,_x,)(pg, svundef_s16(), i16); - // expected-error@+2 {{'sveortb_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'sveortb' needs target feature sve2|sme}} + // expected-error@+2 {{'sveortb_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'sveortb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(sveortb,_s16,,)(svundef_s16(), svundef_s16(), svundef_s16()); - // expected-error@+2 {{'sveortb_n_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'sveortb' needs target feature sve2|sme}} + // expected-error@+2 {{'sveortb_n_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'sveortb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(sveortb,_n_s16,,)(svundef_s16(), svundef_s16(), i16); - // expected-error@+2 {{'svqxtnb_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svqxtnb' needs target feature sve2|sme}} + // expected-error@+2 {{'svqxtnb_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqxtnb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqxtnb,_s16,,)(svundef_s16()); - // expected-error@+2 {{'svmlalt_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svmlalt' needs target feature sve2|sme}} + // expected-error@+2 {{'svmlalt_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svmlalt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svmlalt,_s16,,)(svundef_s16(), svundef_s8(), svundef_s8()); - // expected-error@+2 {{'svmlalt_n_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svmlalt' needs target feature sve2|sme}} + // expected-error@+2 {{'svmlalt_n_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svmlalt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svmlalt,_n_s16,,)(svundef_s16(), svundef_s8(), i8); - // expected-error@+2 {{'svshrnb_n_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svshrnb' needs target feature sve2|sme}} + // expected-error@+2 {{'svshrnb_n_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svshrnb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svshrnb,_n_s16,,)(svundef_s16(), 1); - // expected-error@+2 {{'svaddhnt_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svaddhnt' needs target feature sve2|sme}} + // expected-error@+2 {{'svaddhnt_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svaddhnt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svaddhnt,_s16,,)(svundef_s8(), svundef_s16(), svundef_s16()); - // expected-error@+2 {{'svaddhnt_n_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svaddhnt' needs target feature sve2|sme}} + // expected-error@+2 {{'svaddhnt_n_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svaddhnt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svaddhnt,_n_s16,,)(svundef_s8(), svundef_s16(), i16); - // expected-error@+2 {{'svmls_lane_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svmls_lane' needs target feature sve2|sme}} + // expected-error@+2 {{'svmls_lane_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svmls_lane' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svmls_lane,_s16,,)(svundef_s16(), svundef_s16(), svundef_s16(), 1); - // expected-error@+2 {{'svqdmlalt_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svqdmlalt' needs target feature sve2|sme}} + // expected-error@+2 {{'svqdmlalt_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqdmlalt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqdmlalt,_s16,,)(svundef_s16(), svundef_s8(), svundef_s8()); - // expected-error@+2 {{'svqdmlalt_n_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svqdmlalt' needs target feature sve2|sme}} + // expected-error@+2 {{'svqdmlalt_n_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqdmlalt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqdmlalt,_n_s16,,)(svundef_s16(), svundef_s8(), i8); - // expected-error@+2 {{'svbcax_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svbcax' needs target feature sve2|sme}} + // expected-error@+2 {{'svbcax_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svbcax' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svbcax,_s16,,)(svundef_s16(), svundef_s16(), svundef_s16()); - // expected-error@+2 {{'svbcax_n_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svbcax' needs target feature sve2|sme}} + // expected-error@+2 {{'svbcax_n_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svbcax' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svbcax,_n_s16,,)(svundef_s16(), svundef_s16(), i16); - // expected-error@+2 {{'svqxtnt_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svqxtnt' needs target feature sve2|sme}} + // expected-error@+2 {{'svqxtnt_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqxtnt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqxtnt,_s16,,)(svundef_s8(), svundef_s16()); - // expected-error@+2 {{'svqdmlalb_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svqdmlalb' needs target feature sve2|sme}} + // expected-error@+2 {{'svqdmlalb_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqdmlalb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqdmlalb,_s16,,)(svundef_s16(), svundef_s8(), svundef_s8()); - // expected-error@+2 {{'svqdmlalb_n_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svqdmlalb' needs target feature sve2|sme}} + // expected-error@+2 {{'svqdmlalb_n_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqdmlalb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqdmlalb,_n_s16,,)(svundef_s16(), svundef_s8(), i8); - // expected-error@+2 {{'svqrshl_s16_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svqrshl_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svqrshl_s16_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqrshl_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqrshl,_s16,_z,)(pg, svundef_s16(), svundef_s16()); - // expected-error@+2 {{'svqrshl_s16_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svqrshl_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svqrshl_s16_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqrshl_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqrshl,_s16,_m,)(pg, svundef_s16(), svundef_s16()); - // expected-error@+2 {{'svqrshl_s16_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svqrshl_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svqrshl_s16_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqrshl_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqrshl,_s16,_x,)(pg, svundef_s16(), svundef_s16()); - // expected-error@+2 {{'svqrshl_n_s16_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svqrshl_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svqrshl_n_s16_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqrshl_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqrshl,_n_s16,_z,)(pg, svundef_s16(), i16); - // expected-error@+2 {{'svqrshl_n_s16_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svqrshl_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svqrshl_n_s16_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqrshl_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqrshl,_n_s16,_m,)(pg, svundef_s16(), i16); - // expected-error@+2 {{'svqrshl_n_s16_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svqrshl_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svqrshl_n_s16_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqrshl_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqrshl,_n_s16,_x,)(pg, svundef_s16(), i16); - // expected-error@+2 {{'svsublbt_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svsublbt' needs target feature sve2|sme}} + // expected-error@+2 {{'svsublbt_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svsublbt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svsublbt,_s16,,)(svundef_s8(), svundef_s8()); - // expected-error@+2 {{'svsublbt_n_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svsublbt' needs target feature sve2|sme}} + // expected-error@+2 {{'svsublbt_n_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svsublbt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svsublbt,_n_s16,,)(svundef_s8(), i8); - // expected-error@+2 {{'svqshrnt_n_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svqshrnt' needs target feature sve2|sme}} + // expected-error@+2 {{'svqshrnt_n_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqshrnt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqshrnt,_n_s16,,)(svundef_s8(), svundef_s16(), 1); - // expected-error@+2 {{'svqdmullt_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svqdmullt' needs target feature sve2|sme}} + // expected-error@+2 {{'svqdmullt_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqdmullt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqdmullt,_s16,,)(svundef_s8(), svundef_s8()); - // expected-error@+2 {{'svqdmullt_n_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svqdmullt' needs target feature sve2|sme}} + // expected-error@+2 {{'svqdmullt_n_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqdmullt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqdmullt,_n_s16,,)(svundef_s8(), i8); - // expected-error@+2 {{'svsublt_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svsublt' needs target feature sve2|sme}} + // expected-error@+2 {{'svsublt_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svsublt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svsublt,_s16,,)(svundef_s8(), svundef_s8()); - // expected-error@+2 {{'svsublt_n_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svsublt' needs target feature sve2|sme}} + // expected-error@+2 {{'svsublt_n_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svsublt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svsublt,_n_s16,,)(svundef_s8(), i8); - // expected-error@+2 {{'svqdmlslbt_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svqdmlslbt' needs target feature sve2|sme}} + // expected-error@+2 {{'svqdmlslbt_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqdmlslbt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqdmlslbt,_s16,,)(svundef_s16(), svundef_s8(), svundef_s8()); - // expected-error@+2 {{'svqdmlslbt_n_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svqdmlslbt' needs target feature sve2|sme}} + // expected-error@+2 {{'svqdmlslbt_n_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqdmlslbt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqdmlslbt,_n_s16,,)(svundef_s16(), svundef_s8(), i8); - // expected-error@+2 {{'svadalp_s16_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svadalp_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svadalp_s16_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svadalp_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svadalp,_s16,_z,)(pg, svundef_s16(), svundef_s8()); - // expected-error@+2 {{'svadalp_s16_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svadalp_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svadalp_s16_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svadalp_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svadalp,_s16,_m,)(pg, svundef_s16(), svundef_s8()); - // expected-error@+2 {{'svadalp_s16_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svadalp_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svadalp_s16_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svadalp_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svadalp,_s16,_x,)(pg, svundef_s16(), svundef_s8()); - // expected-error@+2 {{'svmul_lane_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svmul_lane' needs target feature sve2|sme}} + // expected-error@+2 {{'svmul_lane_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svmul_lane' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svmul_lane,_s16,,)(svundef_s16(), svundef_s16(), 1); - // expected-error@+2 {{'svsubwt_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svsubwt' needs target feature sve2|sme}} + // expected-error@+2 {{'svsubwt_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svsubwt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svsubwt,_s16,,)(svundef_s16(), svundef_s8()); - // expected-error@+2 {{'svsubwt_n_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svsubwt' needs target feature sve2|sme}} + // expected-error@+2 {{'svsubwt_n_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svsubwt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svsubwt,_n_s16,,)(svundef_s16(), i8); - // expected-error@+2 {{'svqsubr_s16_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svqsubr_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svqsubr_s16_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqsubr_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqsubr,_s16,_z,)(pg, svundef_s16(), svundef_s16()); - // expected-error@+2 {{'svqsubr_s16_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svqsubr_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svqsubr_s16_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqsubr_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqsubr,_s16,_m,)(pg, svundef_s16(), svundef_s16()); - // expected-error@+2 {{'svqsubr_s16_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svqsubr_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svqsubr_s16_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqsubr_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqsubr,_s16,_x,)(pg, svundef_s16(), svundef_s16()); - // expected-error@+2 {{'svqsubr_n_s16_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svqsubr_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svqsubr_n_s16_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqsubr_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqsubr,_n_s16,_z,)(pg, svundef_s16(), i16); - // expected-error@+2 {{'svqsubr_n_s16_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svqsubr_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svqsubr_n_s16_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqsubr_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqsubr,_n_s16,_m,)(pg, svundef_s16(), i16); - // expected-error@+2 {{'svqsubr_n_s16_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svqsubr_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svqsubr_n_s16_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqsubr_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqsubr,_n_s16,_x,)(pg, svundef_s16(), i16); - // expected-error@+2 {{'svqrshrnt_n_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svqrshrnt' needs target feature sve2|sme}} + // expected-error@+2 {{'svqrshrnt_n_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqrshrnt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqrshrnt,_n_s16,,)(svundef_s8(), svundef_s16(), 1); - // expected-error@+2 {{'svaddp_s16_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svaddp_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svaddp_s16_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svaddp_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svaddp,_s16,_m,)(pg, svundef_s16(), svundef_s16()); - // expected-error@+2 {{'svaddp_s16_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svaddp_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svaddp_s16_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svaddp_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svaddp,_s16,_x,)(pg, svundef_s16(), svundef_s16()); - // expected-error@+2 {{'svqadd_s16_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svqadd_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svqadd_s16_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqadd_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqadd,_s16,_m,)(pg, svundef_s16(), svundef_s16()); - // expected-error@+2 {{'svqadd_n_s16_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svqadd_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svqadd_n_s16_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqadd_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqadd,_n_s16,_m,)(pg, svundef_s16(), i16); - // expected-error@+2 {{'svqadd_s16_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svqadd_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svqadd_s16_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqadd_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqadd,_s16,_z,)(pg, svundef_s16(), svundef_s16()); - // expected-error@+2 {{'svqadd_n_s16_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svqadd_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svqadd_n_s16_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqadd_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqadd,_n_s16,_z,)(pg, svundef_s16(), i16); - // expected-error@+2 {{'svqadd_s16_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svqadd_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svqadd_s16_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqadd_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqadd,_s16,_x,)(pg, svundef_s16(), svundef_s16()); - // expected-error@+2 {{'svqadd_n_s16_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svqadd_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svqadd_n_s16_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqadd_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqadd,_n_s16,_x,)(pg, svundef_s16(), i16); - // expected-error@+2 {{'svabdlb_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svabdlb' needs target feature sve2|sme}} + // expected-error@+2 {{'svabdlb_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svabdlb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svabdlb,_s16,,)(svundef_s8(), svundef_s8()); - // expected-error@+2 {{'svabdlb_n_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svabdlb' needs target feature sve2|sme}} + // expected-error@+2 {{'svabdlb_n_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svabdlb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svabdlb,_n_s16,,)(svundef_s8(), i8); - // expected-error@+2 {{'svtbx_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svtbx' needs target feature sve2|sme}} + // expected-error@+2 {{'svtbx_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svtbx' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svtbx,_s16,,)(svundef_s16(), svundef_s16(), svundef_u16()); - // expected-error@+2 {{'svabdlt_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svabdlt' needs target feature sve2|sme}} + // expected-error@+2 {{'svabdlt_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svabdlt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svabdlt,_s16,,)(svundef_s8(), svundef_s8()); - // expected-error@+2 {{'svabdlt_n_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svabdlt' needs target feature sve2|sme}} + // expected-error@+2 {{'svabdlt_n_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svabdlt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svabdlt,_n_s16,,)(svundef_s8(), i8); - // expected-error@+2 {{'svqrshrnb_n_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svqrshrnb' needs target feature sve2|sme}} + // expected-error@+2 {{'svqrshrnb_n_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqrshrnb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqrshrnb,_n_s16,,)(svundef_s16(), 1); - // expected-error@+2 {{'svminp_s16_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svminp_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svminp_s16_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svminp_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svminp,_s16,_m,)(pg, svundef_s16(), svundef_s16()); - // expected-error@+2 {{'svminp_s16_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svminp_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svminp_s16_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svminp_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svminp,_s16,_x,)(pg, svundef_s16(), svundef_s16()); - // expected-error@+2 {{'svqsub_s16_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svqsub_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svqsub_s16_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqsub_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqsub,_s16,_z,)(pg, svundef_s16(), svundef_s16()); - // expected-error@+2 {{'svqsub_s16_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svqsub_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svqsub_s16_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqsub_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqsub,_s16,_m,)(pg, svundef_s16(), svundef_s16()); - // expected-error@+2 {{'svqsub_s16_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svqsub_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svqsub_s16_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqsub_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqsub,_s16,_x,)(pg, svundef_s16(), svundef_s16()); - // expected-error@+2 {{'svqsub_n_s16_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svqsub_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svqsub_n_s16_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqsub_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqsub,_n_s16,_z,)(pg, svundef_s16(), i16); - // expected-error@+2 {{'svqsub_n_s16_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svqsub_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svqsub_n_s16_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqsub_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqsub,_n_s16,_m,)(pg, svundef_s16(), i16); - // expected-error@+2 {{'svqsub_n_s16_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svqsub_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svqsub_n_s16_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqsub_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqsub,_n_s16,_x,)(pg, svundef_s16(), i16); - // expected-error@+2 {{'svrsubhnb_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svrsubhnb' needs target feature sve2|sme}} + // expected-error@+2 {{'svrsubhnb_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrsubhnb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrsubhnb,_s16,,)(svundef_s16(), svundef_s16()); - // expected-error@+2 {{'svrsubhnb_n_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svrsubhnb' needs target feature sve2|sme}} + // expected-error@+2 {{'svrsubhnb_n_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrsubhnb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrsubhnb,_n_s16,,)(svundef_s16(), i16); - // expected-error@+2 {{'svaddhnb_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svaddhnb' needs target feature sve2|sme}} + // expected-error@+2 {{'svaddhnb_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svaddhnb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svaddhnb,_s16,,)(svundef_s16(), svundef_s16()); - // expected-error@+2 {{'svaddhnb_n_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svaddhnb' needs target feature sve2|sme}} + // expected-error@+2 {{'svaddhnb_n_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svaddhnb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svaddhnb,_n_s16,,)(svundef_s16(), i16); - // expected-error@+2 {{'svabalt_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svabalt' needs target feature sve2|sme}} + // expected-error@+2 {{'svabalt_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svabalt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svabalt,_s16,,)(svundef_s16(), svundef_s8(), svundef_s8()); - // expected-error@+2 {{'svabalt_n_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svabalt' needs target feature sve2|sme}} + // expected-error@+2 {{'svabalt_n_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svabalt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svabalt,_n_s16,,)(svundef_s16(), svundef_s8(), i8); - // expected-error@+2 {{'svqshrnb_n_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svqshrnb' needs target feature sve2|sme}} + // expected-error@+2 {{'svqshrnb_n_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqshrnb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqshrnb,_n_s16,,)(svundef_s16(), 1); - // expected-error@+2 {{'sveor3_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'sveor3' needs target feature sve2|sme}} + // expected-error@+2 {{'sveor3_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'sveor3' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(sveor3,_s16,,)(svundef_s16(), svundef_s16(), svundef_s16()); - // expected-error@+2 {{'sveor3_n_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'sveor3' needs target feature sve2|sme}} + // expected-error@+2 {{'sveor3_n_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'sveor3' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(sveor3,_n_s16,,)(svundef_s16(), svundef_s16(), i16); - // expected-error@+2 {{'svhadd_s16_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svhadd_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svhadd_s16_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhadd_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhadd,_s16,_m,)(pg, svundef_s16(), svundef_s16()); - // expected-error@+2 {{'svhadd_n_s16_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svhadd_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svhadd_n_s16_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhadd_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhadd,_n_s16,_m,)(pg, svundef_s16(), i16); - // expected-error@+2 {{'svhadd_s16_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svhadd_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svhadd_s16_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhadd_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhadd,_s16,_z,)(pg, svundef_s16(), svundef_s16()); - // expected-error@+2 {{'svhadd_n_s16_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svhadd_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svhadd_n_s16_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhadd_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhadd,_n_s16,_z,)(pg, svundef_s16(), i16); - // expected-error@+2 {{'svhadd_s16_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svhadd_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svhadd_s16_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhadd_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhadd,_s16,_x,)(pg, svundef_s16(), svundef_s16()); - // expected-error@+2 {{'svhadd_n_s16_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svhadd_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svhadd_n_s16_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhadd_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhadd,_n_s16,_x,)(pg, svundef_s16(), i16); - // expected-error@+2 {{'svqshrunb_n_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svqshrunb' needs target feature sve2|sme}} + // expected-error@+2 {{'svqshrunb_n_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqshrunb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqshrunb,_n_s16,,)(svundef_s16(), 1); - // expected-error@+2 {{'svmovlb_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svmovlb' needs target feature sve2|sme}} + // expected-error@+2 {{'svmovlb_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svmovlb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svmovlb,_s16,,)(svundef_s8()); - // expected-error@+2 {{'svqrdmlsh_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svqrdmlsh' needs target feature sve2|sme}} + // expected-error@+2 {{'svqrdmlsh_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqrdmlsh' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqrdmlsh,_s16,,)(svundef_s16(), svundef_s16(), svundef_s16()); - // expected-error@+2 {{'svqrdmlsh_n_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svqrdmlsh' needs target feature sve2|sme}} + // expected-error@+2 {{'svqrdmlsh_n_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqrdmlsh' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqrdmlsh,_n_s16,,)(svundef_s16(), svundef_s16(), i16); - // expected-error@+2 {{'svqrdmlsh_lane_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svqrdmlsh_lane' needs target feature sve2|sme}} + // expected-error@+2 {{'svqrdmlsh_lane_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqrdmlsh_lane' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqrdmlsh_lane,_s16,,)(svundef_s16(), svundef_s16(), svundef_s16(), 1); - // expected-error@+2 {{'svqdmlslt_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svqdmlslt' needs target feature sve2|sme}} + // expected-error@+2 {{'svqdmlslt_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqdmlslt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqdmlslt,_s16,,)(svundef_s16(), svundef_s8(), svundef_s8()); - // expected-error@+2 {{'svqdmlslt_n_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svqdmlslt' needs target feature sve2|sme}} + // expected-error@+2 {{'svqdmlslt_n_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqdmlslt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqdmlslt,_n_s16,,)(svundef_s16(), svundef_s8(), i8); - // expected-error@+2 {{'svmaxp_s16_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svmaxp_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svmaxp_s16_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svmaxp_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svmaxp,_s16,_m,)(pg, svundef_s16(), svundef_s16()); - // expected-error@+2 {{'svmaxp_s16_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svmaxp_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svmaxp_s16_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svmaxp_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svmaxp,_s16,_x,)(pg, svundef_s16(), svundef_s16()); - // expected-error@+2 {{'svmullt_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svmullt' needs target feature sve2|sme}} + // expected-error@+2 {{'svmullt_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svmullt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svmullt,_s16,,)(svundef_s8(), svundef_s8()); - // expected-error@+2 {{'svmullt_n_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svmullt' needs target feature sve2|sme}} + // expected-error@+2 {{'svmullt_n_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svmullt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svmullt,_n_s16,,)(svundef_s8(), i8); - // expected-error@+2 {{'svmatch_s16' needs target feature sve2}} - // overload-error@+1 {{'svmatch' needs target feature sve2}} + // expected-error@+2 {{'svmatch_s16' needs target feature sve,sve2}} + // overload-error@+1 {{'svmatch' needs target feature sve,sve2}} SVE_ACLE_FUNC(svmatch,_s16,,)(pg, svundef_s16(), svundef_s16()); - // expected-error@+2 {{'svqxtunb_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svqxtunb' needs target feature sve2|sme}} + // expected-error@+2 {{'svqxtunb_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqxtunb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqxtunb,_s16,,)(svundef_s16()); - // expected-error@+2 {{'svmla_lane_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svmla_lane' needs target feature sve2|sme}} + // expected-error@+2 {{'svmla_lane_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svmla_lane' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svmla_lane,_s16,,)(svundef_s16(), svundef_s16(), svundef_s16(), 1); - // expected-error@+2 {{'svrshrnb_n_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svrshrnb' needs target feature sve2|sme}} + // expected-error@+2 {{'svrshrnb_n_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrshrnb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrshrnb,_n_s16,,)(svundef_s16(), 1); - // expected-error@+2 {{'svwhilerw_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svwhilerw' needs target feature sve2|sme}} + // expected-error@+2 {{'svwhilerw_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svwhilerw' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svwhilerw,_s16,,)(const_i16_ptr, const_i16_ptr); - // expected-error@+2 {{'svshllb_n_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svshllb' needs target feature sve2|sme}} + // expected-error@+2 {{'svshllb_n_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svshllb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svshllb,_n_s16,,)(svundef_s8(), 2); - // expected-error@+2 {{'svrhadd_s16_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svrhadd_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svrhadd_s16_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrhadd_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrhadd,_s16,_m,)(pg, svundef_s16(), svundef_s16()); - // expected-error@+2 {{'svrhadd_n_s16_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svrhadd_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svrhadd_n_s16_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrhadd_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrhadd,_n_s16,_m,)(pg, svundef_s16(), i16); - // expected-error@+2 {{'svrhadd_s16_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svrhadd_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svrhadd_s16_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrhadd_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrhadd,_s16,_z,)(pg, svundef_s16(), svundef_s16()); - // expected-error@+2 {{'svrhadd_n_s16_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svrhadd_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svrhadd_n_s16_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrhadd_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrhadd,_n_s16,_z,)(pg, svundef_s16(), i16); - // expected-error@+2 {{'svrhadd_s16_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svrhadd_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svrhadd_s16_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrhadd_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrhadd,_s16,_x,)(pg, svundef_s16(), svundef_s16()); - // expected-error@+2 {{'svrhadd_n_s16_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svrhadd_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svrhadd_n_s16_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrhadd_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrhadd,_n_s16,_x,)(pg, svundef_s16(), i16); - // expected-error@+2 {{'svraddhnb_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svraddhnb' needs target feature sve2|sme}} + // expected-error@+2 {{'svraddhnb_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svraddhnb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svraddhnb,_s16,,)(svundef_s16(), svundef_s16()); - // expected-error@+2 {{'svraddhnb_n_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svraddhnb' needs target feature sve2|sme}} + // expected-error@+2 {{'svraddhnb_n_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svraddhnb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svraddhnb,_n_s16,,)(svundef_s16(), i16); - // expected-error@+2 {{'svwhilewr_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svwhilewr' needs target feature sve2|sme}} + // expected-error@+2 {{'svwhilewr_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svwhilewr' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svwhilewr,_s16,,)(const_i16_ptr, const_i16_ptr); - // expected-error@+2 {{'svmlalb_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svmlalb' needs target feature sve2|sme}} + // expected-error@+2 {{'svmlalb_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svmlalb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svmlalb,_s16,,)(svundef_s16(), svundef_s8(), svundef_s8()); - // expected-error@+2 {{'svmlalb_n_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svmlalb' needs target feature sve2|sme}} + // expected-error@+2 {{'svmlalb_n_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svmlalb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svmlalb,_n_s16,,)(svundef_s16(), svundef_s8(), i8); - // expected-error@+2 {{'svsubwb_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svsubwb' needs target feature sve2|sme}} + // expected-error@+2 {{'svsubwb_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svsubwb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svsubwb,_s16,,)(svundef_s16(), svundef_s8()); - // expected-error@+2 {{'svsubwb_n_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svsubwb' needs target feature sve2|sme}} + // expected-error@+2 {{'svsubwb_n_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svsubwb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svsubwb,_n_s16,,)(svundef_s16(), i8); - // expected-error@+2 {{'svnmatch_s16' needs target feature sve2}} - // overload-error@+1 {{'svnmatch' needs target feature sve2}} + // expected-error@+2 {{'svnmatch_s16' needs target feature sve,sve2}} + // overload-error@+1 {{'svnmatch' needs target feature sve,sve2}} SVE_ACLE_FUNC(svnmatch,_s16,,)(pg, svundef_s16(), svundef_s16()); - // expected-error@+2 {{'svaba_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svaba' needs target feature sve2|sme}} + // expected-error@+2 {{'svaba_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svaba' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svaba,_s16,,)(svundef_s16(), svundef_s16(), svundef_s16()); - // expected-error@+2 {{'svaba_n_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svaba' needs target feature sve2|sme}} + // expected-error@+2 {{'svaba_n_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svaba' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svaba,_n_s16,,)(svundef_s16(), svundef_s16(), i16); - // expected-error@+2 {{'svraddhnt_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svraddhnt' needs target feature sve2|sme}} + // expected-error@+2 {{'svraddhnt_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svraddhnt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svraddhnt,_s16,,)(svundef_s8(), svundef_s16(), svundef_s16()); - // expected-error@+2 {{'svraddhnt_n_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svraddhnt' needs target feature sve2|sme}} + // expected-error@+2 {{'svraddhnt_n_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svraddhnt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svraddhnt,_n_s16,,)(svundef_s8(), svundef_s16(), i16); - // expected-error@+2 {{'svuqadd_s16_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svuqadd_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svuqadd_s16_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svuqadd_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svuqadd,_s16,_m,)(pg, svundef_s16(), svundef_u16()); - // expected-error@+2 {{'svuqadd_n_s16_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svuqadd_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svuqadd_n_s16_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svuqadd_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svuqadd,_n_s16,_m,)(pg, svundef_s16(), u16); - // expected-error@+2 {{'svuqadd_s16_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svuqadd_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svuqadd_s16_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svuqadd_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svuqadd,_s16,_z,)(pg, svundef_s16(), svundef_u16()); - // expected-error@+2 {{'svuqadd_n_s16_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svuqadd_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svuqadd_n_s16_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svuqadd_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svuqadd,_n_s16,_z,)(pg, svundef_s16(), u16); - // expected-error@+2 {{'svuqadd_s16_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svuqadd_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svuqadd_s16_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svuqadd_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svuqadd,_s16,_x,)(pg, svundef_s16(), svundef_u16()); - // expected-error@+2 {{'svuqadd_n_s16_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svuqadd_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svuqadd_n_s16_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svuqadd_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svuqadd,_n_s16,_x,)(pg, svundef_s16(), u16); - // expected-error@+2 {{'sveorbt_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'sveorbt' needs target feature sve2|sme}} + // expected-error@+2 {{'sveorbt_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'sveorbt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(sveorbt,_s16,,)(svundef_s16(), svundef_s16(), svundef_s16()); - // expected-error@+2 {{'sveorbt_n_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'sveorbt' needs target feature sve2|sme}} + // expected-error@+2 {{'sveorbt_n_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'sveorbt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(sveorbt,_n_s16,,)(svundef_s16(), svundef_s16(), i16); - // expected-error@+2 {{'svbsl_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svbsl' needs target feature sve2|sme}} + // expected-error@+2 {{'svbsl_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svbsl' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svbsl,_s16,,)(svundef_s16(), svundef_s16(), svundef_s16()); - // expected-error@+2 {{'svbsl_n_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svbsl' needs target feature sve2|sme}} + // expected-error@+2 {{'svbsl_n_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svbsl' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svbsl,_n_s16,,)(svundef_s16(), svundef_s16(), i16); - // expected-error@+2 {{'svshllt_n_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svshllt' needs target feature sve2|sme}} + // expected-error@+2 {{'svshllt_n_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svshllt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svshllt,_n_s16,,)(svundef_s8(), 2); - // expected-error@+2 {{'svsubltb_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svsubltb' needs target feature sve2|sme}} + // expected-error@+2 {{'svsubltb_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svsubltb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svsubltb,_s16,,)(svundef_s8(), svundef_s8()); - // expected-error@+2 {{'svsubltb_n_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svsubltb' needs target feature sve2|sme}} + // expected-error@+2 {{'svsubltb_n_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svsubltb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svsubltb,_n_s16,,)(svundef_s8(), i8); - // expected-error@+2 {{'svhsub_s16_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svhsub_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svhsub_s16_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhsub_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhsub,_s16,_z,)(pg, svundef_s16(), svundef_s16()); - // expected-error@+2 {{'svhsub_s16_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svhsub_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svhsub_s16_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhsub_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhsub,_s16,_m,)(pg, svundef_s16(), svundef_s16()); - // expected-error@+2 {{'svhsub_s16_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svhsub_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svhsub_s16_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhsub_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhsub,_s16,_x,)(pg, svundef_s16(), svundef_s16()); - // expected-error@+2 {{'svhsub_n_s16_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svhsub_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svhsub_n_s16_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhsub_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhsub,_n_s16,_z,)(pg, svundef_s16(), i16); - // expected-error@+2 {{'svhsub_n_s16_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svhsub_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svhsub_n_s16_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhsub_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhsub,_n_s16,_m,)(pg, svundef_s16(), i16); - // expected-error@+2 {{'svhsub_n_s16_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svhsub_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svhsub_n_s16_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhsub_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhsub,_n_s16,_x,)(pg, svundef_s16(), i16); - // expected-error@+2 {{'svaddlb_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svaddlb' needs target feature sve2|sme}} + // expected-error@+2 {{'svaddlb_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svaddlb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svaddlb,_s16,,)(svundef_s8(), svundef_s8()); - // expected-error@+2 {{'svaddlb_n_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svaddlb' needs target feature sve2|sme}} + // expected-error@+2 {{'svaddlb_n_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svaddlb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svaddlb,_n_s16,,)(svundef_s8(), i8); - // expected-error@+2 {{'svqrdmlah_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svqrdmlah' needs target feature sve2|sme}} + // expected-error@+2 {{'svqrdmlah_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqrdmlah' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqrdmlah,_s16,,)(svundef_s16(), svundef_s16(), svundef_s16()); - // expected-error@+2 {{'svqrdmlah_n_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svqrdmlah' needs target feature sve2|sme}} + // expected-error@+2 {{'svqrdmlah_n_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqrdmlah' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqrdmlah,_n_s16,,)(svundef_s16(), svundef_s16(), i16); - // expected-error@+2 {{'svqrdmlah_lane_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svqrdmlah_lane' needs target feature sve2|sme}} + // expected-error@+2 {{'svqrdmlah_lane_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqrdmlah_lane' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqrdmlah_lane,_s16,,)(svundef_s16(), svundef_s16(), svundef_s16(), 1); - // expected-error@+2 {{'svqdmullb_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svqdmullb' needs target feature sve2|sme}} + // expected-error@+2 {{'svqdmullb_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqdmullb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqdmullb,_s16,,)(svundef_s8(), svundef_s8()); - // expected-error@+2 {{'svqdmullb_n_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svqdmullb' needs target feature sve2|sme}} + // expected-error@+2 {{'svqdmullb_n_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqdmullb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqdmullb,_n_s16,,)(svundef_s8(), i8); - // expected-error@+2 {{'svbsl2n_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svbsl2n' needs target feature sve2|sme}} + // expected-error@+2 {{'svbsl2n_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svbsl2n' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svbsl2n,_s16,,)(svundef_s16(), svundef_s16(), svundef_s16()); - // expected-error@+2 {{'svbsl2n_n_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svbsl2n' needs target feature sve2|sme}} + // expected-error@+2 {{'svbsl2n_n_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svbsl2n' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svbsl2n,_n_s16,,)(svundef_s16(), svundef_s16(), i16); - // expected-error@+2 {{'svaddlt_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svaddlt' needs target feature sve2|sme}} + // expected-error@+2 {{'svaddlt_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svaddlt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svaddlt,_s16,,)(svundef_s8(), svundef_s8()); - // expected-error@+2 {{'svaddlt_n_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svaddlt' needs target feature sve2|sme}} + // expected-error@+2 {{'svaddlt_n_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svaddlt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svaddlt,_n_s16,,)(svundef_s8(), i8); - // expected-error@+2 {{'svqxtunt_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svqxtunt' needs target feature sve2|sme}} + // expected-error@+2 {{'svqxtunt_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqxtunt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqxtunt,_s16,,)(svundef_u8(), svundef_s16()); - // expected-error@+2 {{'svqrshrunt_n_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svqrshrunt' needs target feature sve2|sme}} + // expected-error@+2 {{'svqrshrunt_n_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqrshrunt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqrshrunt,_n_s16,,)(svundef_u8(), svundef_s16(), 1); - // expected-error@+2 {{'svabalb_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svabalb' needs target feature sve2|sme}} + // expected-error@+2 {{'svabalb_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svabalb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svabalb,_s16,,)(svundef_s16(), svundef_s8(), svundef_s8()); - // expected-error@+2 {{'svabalb_n_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svabalb' needs target feature sve2|sme}} + // expected-error@+2 {{'svabalb_n_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svabalb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svabalb,_n_s16,,)(svundef_s16(), svundef_s8(), i8); - // expected-error@+2 {{'svsublb_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svsublb' needs target feature sve2|sme}} + // expected-error@+2 {{'svsublb_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svsublb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svsublb,_s16,,)(svundef_s8(), svundef_s8()); - // expected-error@+2 {{'svsublb_n_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svsublb' needs target feature sve2|sme}} + // expected-error@+2 {{'svsublb_n_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svsublb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svsublb,_n_s16,,)(svundef_s8(), i8); - // expected-error@+2 {{'svbsl1n_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svbsl1n' needs target feature sve2|sme}} + // expected-error@+2 {{'svbsl1n_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svbsl1n' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svbsl1n,_s16,,)(svundef_s16(), svundef_s16(), svundef_s16()); - // expected-error@+2 {{'svbsl1n_n_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svbsl1n' needs target feature sve2|sme}} + // expected-error@+2 {{'svbsl1n_n_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svbsl1n' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svbsl1n,_n_s16,,)(svundef_s16(), svundef_s16(), i16); - // expected-error@+2 {{'svrshl_s16_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svrshl_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svrshl_s16_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrshl_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrshl,_s16,_z,)(pg, svundef_s16(), svundef_s16()); - // expected-error@+2 {{'svrshl_s16_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svrshl_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svrshl_s16_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrshl_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrshl,_s16,_m,)(pg, svundef_s16(), svundef_s16()); - // expected-error@+2 {{'svrshl_s16_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svrshl_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svrshl_s16_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrshl_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrshl,_s16,_x,)(pg, svundef_s16(), svundef_s16()); - // expected-error@+2 {{'svrshl_n_s16_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svrshl_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svrshl_n_s16_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrshl_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrshl,_n_s16,_z,)(pg, svundef_s16(), i16); - // expected-error@+2 {{'svrshl_n_s16_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svrshl_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svrshl_n_s16_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrshl_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrshl,_n_s16,_m,)(pg, svundef_s16(), i16); - // expected-error@+2 {{'svrshl_n_s16_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svrshl_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svrshl_n_s16_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrshl_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrshl,_n_s16,_x,)(pg, svundef_s16(), i16); - // expected-error@+2 {{'svaddwt_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svaddwt' needs target feature sve2|sme}} + // expected-error@+2 {{'svaddwt_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svaddwt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svaddwt,_s16,,)(svundef_s16(), svundef_s8()); - // expected-error@+2 {{'svaddwt_n_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svaddwt' needs target feature sve2|sme}} + // expected-error@+2 {{'svaddwt_n_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svaddwt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svaddwt,_n_s16,,)(svundef_s16(), i8); - // expected-error@+2 {{'svmlslb_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svmlslb' needs target feature sve2|sme}} + // expected-error@+2 {{'svmlslb_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svmlslb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svmlslb,_s16,,)(svundef_s16(), svundef_s8(), svundef_s8()); - // expected-error@+2 {{'svmlslb_n_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svmlslb' needs target feature sve2|sme}} + // expected-error@+2 {{'svmlslb_n_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svmlslb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svmlslb,_n_s16,,)(svundef_s16(), svundef_s8(), i8); - // expected-error@+2 {{'svmlslt_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svmlslt' needs target feature sve2|sme}} + // expected-error@+2 {{'svmlslt_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svmlslt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svmlslt,_s16,,)(svundef_s16(), svundef_s8(), svundef_s8()); - // expected-error@+2 {{'svmlslt_n_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svmlslt' needs target feature sve2|sme}} + // expected-error@+2 {{'svmlslt_n_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svmlslt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svmlslt,_n_s16,,)(svundef_s16(), svundef_s8(), i8); - // expected-error@+2 {{'svqneg_s16_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svqneg_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svqneg_s16_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqneg_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqneg,_s16,_z,)(pg, svundef_s16()); - // expected-error@+2 {{'svqneg_s16_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svqneg_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svqneg_s16_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqneg_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqneg,_s16,_m,)(svundef_s16(), pg, svundef_s16()); - // expected-error@+2 {{'svqneg_s16_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svqneg_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svqneg_s16_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqneg_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqneg,_s16,_x,)(pg, svundef_s16()); - // expected-error@+2 {{'svmovlt_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svmovlt' needs target feature sve2|sme}} + // expected-error@+2 {{'svmovlt_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svmovlt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svmovlt,_s16,,)(svundef_s8()); - // expected-error@+2 {{'svrshrnt_n_s16' needs target feature sve2|sme}} - // overload-error@+1 {{'svrshrnt' needs target feature sve2|sme}} + // expected-error@+2 {{'svrshrnt_n_s16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrshrnt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrshrnt,_n_s16,,)(svundef_s8(), svundef_s16(), 1); - // expected-error@+2 {{'svqshl_s16_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svqshl_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svqshl_s16_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqshl_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqshl,_s16,_z,)(pg, svundef_s16(), svundef_s16()); - // expected-error@+2 {{'svqshl_s16_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svqshl_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svqshl_s16_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqshl_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqshl,_s16,_m,)(pg, svundef_s16(), svundef_s16()); - // expected-error@+2 {{'svqshl_s16_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svqshl_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svqshl_s16_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqshl_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqshl,_s16,_x,)(pg, svundef_s16(), svundef_s16()); - // expected-error@+2 {{'svqshl_n_s16_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svqshl_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svqshl_n_s16_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqshl_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqshl,_n_s16,_z,)(pg, svundef_s16(), i16); - // expected-error@+2 {{'svqshl_n_s16_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svqshl_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svqshl_n_s16_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqshl_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqshl,_n_s16,_m,)(pg, svundef_s16(), i16); - // expected-error@+2 {{'svqshl_n_s16_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svqshl_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svqshl_n_s16_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqshl_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqshl,_n_s16,_x,)(pg, svundef_s16(), i16); - // expected-error@+2 {{'svmullb_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svmullb' needs target feature sve2|sme}} + // expected-error@+2 {{'svmullb_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svmullb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svmullb,_s32,,)(svundef_s16(), svundef_s16()); - // expected-error@+2 {{'svmullb_n_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svmullb' needs target feature sve2|sme}} + // expected-error@+2 {{'svmullb_n_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svmullb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svmullb,_n_s32,,)(svundef_s16(), i16); - // expected-error@+2 {{'svmullb_lane_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svmullb_lane' needs target feature sve2|sme}} + // expected-error@+2 {{'svmullb_lane_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svmullb_lane' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svmullb_lane,_s32,,)(svundef_s16(), svundef_s16(), 1); - // expected-error@+2 {{'svqdmlalbt_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svqdmlalbt' needs target feature sve2|sme}} + // expected-error@+2 {{'svqdmlalbt_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqdmlalbt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqdmlalbt,_s32,,)(svundef_s32(), svundef_s16(), svundef_s16()); - // expected-error@+2 {{'svqdmlalbt_n_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svqdmlalbt' needs target feature sve2|sme}} + // expected-error@+2 {{'svqdmlalbt_n_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqdmlalbt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqdmlalbt,_n_s32,,)(svundef_s32(), svundef_s16(), i16); - // expected-error@+2 {{'svqrdmulh_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svqrdmulh' needs target feature sve2|sme}} + // expected-error@+2 {{'svqrdmulh_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqrdmulh' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqrdmulh,_s32,,)(svundef_s32(), svundef_s32()); - // expected-error@+2 {{'svqrdmulh_n_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svqrdmulh' needs target feature sve2|sme}} + // expected-error@+2 {{'svqrdmulh_n_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqrdmulh' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqrdmulh,_n_s32,,)(svundef_s32(), i32); - // expected-error@+2 {{'svaddwb_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svaddwb' needs target feature sve2|sme}} + // expected-error@+2 {{'svaddwb_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svaddwb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svaddwb,_s32,,)(svundef_s32(), svundef_s16()); - // expected-error@+2 {{'svaddwb_n_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svaddwb' needs target feature sve2|sme}} + // expected-error@+2 {{'svaddwb_n_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svaddwb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svaddwb,_n_s32,,)(svundef_s32(), i16); - // expected-error@+2 {{'svsubhnb_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svsubhnb' needs target feature sve2|sme}} + // expected-error@+2 {{'svsubhnb_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svsubhnb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svsubhnb,_s32,,)(svundef_s32(), svundef_s32()); - // expected-error@+2 {{'svsubhnb_n_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svsubhnb' needs target feature sve2|sme}} + // expected-error@+2 {{'svsubhnb_n_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svsubhnb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svsubhnb,_n_s32,,)(svundef_s32(), i32); - // expected-error@+2 {{'svqdmulh_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svqdmulh' needs target feature sve2|sme}} + // expected-error@+2 {{'svqdmulh_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqdmulh' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqdmulh,_s32,,)(svundef_s32(), svundef_s32()); - // expected-error@+2 {{'svqdmulh_n_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svqdmulh' needs target feature sve2|sme}} + // expected-error@+2 {{'svqdmulh_n_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqdmulh' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqdmulh,_n_s32,,)(svundef_s32(), i32); - // expected-error@+2 {{'svrsubhnt_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svrsubhnt' needs target feature sve2|sme}} + // expected-error@+2 {{'svrsubhnt_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrsubhnt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrsubhnt,_s32,,)(svundef_s16(), svundef_s32(), svundef_s32()); - // expected-error@+2 {{'svrsubhnt_n_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svrsubhnt' needs target feature sve2|sme}} + // expected-error@+2 {{'svrsubhnt_n_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrsubhnt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrsubhnt,_n_s32,,)(svundef_s16(), svundef_s32(), i32); - // expected-error@+2 {{'svnbsl_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svnbsl' needs target feature sve2|sme}} + // expected-error@+2 {{'svnbsl_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svnbsl' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svnbsl,_s32,,)(svundef_s32(), svundef_s32(), svundef_s32()); - // expected-error@+2 {{'svnbsl_n_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svnbsl' needs target feature sve2|sme}} + // expected-error@+2 {{'svnbsl_n_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svnbsl' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svnbsl,_n_s32,,)(svundef_s32(), svundef_s32(), i32); - // expected-error@+2 {{'svqdmlslb_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svqdmlslb' needs target feature sve2|sme}} + // expected-error@+2 {{'svqdmlslb_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqdmlslb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqdmlslb,_s32,,)(svundef_s32(), svundef_s16(), svundef_s16()); - // expected-error@+2 {{'svqdmlslb_n_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svqdmlslb' needs target feature sve2|sme}} + // expected-error@+2 {{'svqdmlslb_n_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqdmlslb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqdmlslb,_n_s32,,)(svundef_s32(), svundef_s16(), i16); - // expected-error@+2 {{'svqdmlslb_lane_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svqdmlslb_lane' needs target feature sve2|sme}} + // expected-error@+2 {{'svqdmlslb_lane_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqdmlslb_lane' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqdmlslb_lane,_s32,,)(svundef_s32(), svundef_s16(), svundef_s16(), 1); - // expected-error@+2 {{'svsubhnt_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svsubhnt' needs target feature sve2|sme}} + // expected-error@+2 {{'svsubhnt_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svsubhnt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svsubhnt,_s32,,)(svundef_s16(), svundef_s32(), svundef_s32()); - // expected-error@+2 {{'svsubhnt_n_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svsubhnt' needs target feature sve2|sme}} + // expected-error@+2 {{'svsubhnt_n_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svsubhnt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svsubhnt,_n_s32,,)(svundef_s16(), svundef_s32(), i32); - // expected-error@+2 {{'svqabs_s32_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svqabs_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svqabs_s32_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqabs_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqabs,_s32,_z,)(pg, svundef_s32()); - // expected-error@+2 {{'svqabs_s32_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svqabs_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svqabs_s32_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqabs_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqabs,_s32,_m,)(svundef_s32(), pg, svundef_s32()); - // expected-error@+2 {{'svqabs_s32_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svqabs_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svqabs_s32_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqabs_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqabs,_s32,_x,)(pg, svundef_s32()); - // expected-error@+2 {{'svwhilegt_b8_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svwhilegt_b8' needs target feature sve2|sme}} + // expected-error@+2 {{'svwhilegt_b8_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svwhilegt_b8' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svwhilegt_b8,_s32,,)(i32, i32); - // expected-error@+2 {{'svwhilegt_b16_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svwhilegt_b16' needs target feature sve2|sme}} + // expected-error@+2 {{'svwhilegt_b16_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svwhilegt_b16' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svwhilegt_b16,_s32,,)(i32, i32); - // expected-error@+2 {{'svwhilegt_b32_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svwhilegt_b32' needs target feature sve2|sme}} + // expected-error@+2 {{'svwhilegt_b32_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svwhilegt_b32' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svwhilegt_b32,_s32,,)(i32, i32); - // expected-error@+2 {{'svwhilegt_b64_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svwhilegt_b64' needs target feature sve2|sme}} + // expected-error@+2 {{'svwhilegt_b64_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svwhilegt_b64' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svwhilegt_b64,_s32,,)(i32, i32); - // expected-error@+2 {{'svaddlbt_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svaddlbt' needs target feature sve2|sme}} + // expected-error@+2 {{'svaddlbt_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svaddlbt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svaddlbt,_s32,,)(svundef_s16(), svundef_s16()); - // expected-error@+2 {{'svaddlbt_n_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svaddlbt' needs target feature sve2|sme}} + // expected-error@+2 {{'svaddlbt_n_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svaddlbt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svaddlbt,_n_s32,,)(svundef_s16(), i16); - // expected-error@+2 {{'svtbl2_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svtbl2' needs target feature sve2|sme}} + // expected-error@+2 {{'svtbl2_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svtbl2' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svtbl2,_s32,,)(svundef2_s32(), svundef_u32()); - // expected-error@+2 {{'svhsubr_s32_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svhsubr_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svhsubr_s32_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhsubr_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhsubr,_s32,_z,)(pg, svundef_s32(), svundef_s32()); - // expected-error@+2 {{'svhsubr_s32_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svhsubr_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svhsubr_s32_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhsubr_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhsubr,_s32,_m,)(pg, svundef_s32(), svundef_s32()); - // expected-error@+2 {{'svhsubr_s32_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svhsubr_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svhsubr_s32_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhsubr_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhsubr,_s32,_x,)(pg, svundef_s32(), svundef_s32()); - // expected-error@+2 {{'svhsubr_n_s32_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svhsubr_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svhsubr_n_s32_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhsubr_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhsubr,_n_s32,_z,)(pg, svundef_s32(), i32); - // expected-error@+2 {{'svhsubr_n_s32_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svhsubr_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svhsubr_n_s32_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhsubr_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhsubr,_n_s32,_m,)(pg, svundef_s32(), i32); - // expected-error@+2 {{'svhsubr_n_s32_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svhsubr_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svhsubr_n_s32_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhsubr_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhsubr,_n_s32,_x,)(pg, svundef_s32(), i32); - // expected-error@+2 {{'svhistcnt_s32_z' needs target feature sve2}} - // overload-error@+1 {{'svhistcnt_z' needs target feature sve2}} + // expected-error@+2 {{'svhistcnt_s32_z' needs target feature sve,sve2}} + // overload-error@+1 {{'svhistcnt_z' needs target feature sve,sve2}} SVE_ACLE_FUNC(svhistcnt,_s32,_z,)(pg, svundef_s32(), svundef_s32()); - // expected-error@+2 {{'sveortb_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'sveortb' needs target feature sve2|sme}} + // expected-error@+2 {{'sveortb_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'sveortb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(sveortb,_s32,,)(svundef_s32(), svundef_s32(), svundef_s32()); - // expected-error@+2 {{'sveortb_n_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'sveortb' needs target feature sve2|sme}} + // expected-error@+2 {{'sveortb_n_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'sveortb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(sveortb,_n_s32,,)(svundef_s32(), svundef_s32(), i32); - // expected-error@+2 {{'svqxtnb_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svqxtnb' needs target feature sve2|sme}} + // expected-error@+2 {{'svqxtnb_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqxtnb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqxtnb,_s32,,)(svundef_s32()); - // expected-error@+2 {{'svmlalt_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svmlalt' needs target feature sve2|sme}} + // expected-error@+2 {{'svmlalt_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svmlalt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svmlalt,_s32,,)(svundef_s32(), svundef_s16(), svundef_s16()); - // expected-error@+2 {{'svmlalt_n_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svmlalt' needs target feature sve2|sme}} + // expected-error@+2 {{'svmlalt_n_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svmlalt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svmlalt,_n_s32,,)(svundef_s32(), svundef_s16(), i16); - // expected-error@+2 {{'svmlalt_lane_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svmlalt_lane' needs target feature sve2|sme}} + // expected-error@+2 {{'svmlalt_lane_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svmlalt_lane' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svmlalt_lane,_s32,,)(svundef_s32(), svundef_s16(), svundef_s16(), 1); - // expected-error@+2 {{'svaddhnt_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svaddhnt' needs target feature sve2|sme}} + // expected-error@+2 {{'svaddhnt_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svaddhnt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svaddhnt,_s32,,)(svundef_s16(), svundef_s32(), svundef_s32()); - // expected-error@+2 {{'svaddhnt_n_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svaddhnt' needs target feature sve2|sme}} + // expected-error@+2 {{'svaddhnt_n_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svaddhnt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svaddhnt,_n_s32,,)(svundef_s16(), svundef_s32(), i32); - // expected-error@+2 {{'svldnt1uh_gather_u32base_s32' needs target feature sve2}} - // overload-error@+1 {{'svldnt1uh_gather_s32' needs target feature sve2}} + // expected-error@+2 {{'svldnt1uh_gather_u32base_s32' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1uh_gather_s32' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1uh_gather, _u32base, _s32, )(pg, svundef_u32()); - // expected-error@+2 {{'svldnt1uh_gather_u32offset_s32' needs target feature sve2}} - // overload-error@+1 {{'svldnt1uh_gather_offset_s32' needs target feature sve2}} + // expected-error@+2 {{'svldnt1uh_gather_u32offset_s32' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1uh_gather_offset_s32' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1uh_gather_, u32, offset_s32, )(pg, const_u16_ptr, svundef_u32()); - // expected-error@+2 {{'svldnt1uh_gather_u32base_offset_s32' needs target feature sve2}} - // overload-error@+1 {{'svldnt1uh_gather_offset_s32' needs target feature sve2}} + // expected-error@+2 {{'svldnt1uh_gather_u32base_offset_s32' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1uh_gather_offset_s32' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1uh_gather, _u32base, _offset_s32, )(pg, svundef_u32(), i64); - // expected-error@+2 {{'svldnt1uh_gather_u32base_index_s32' needs target feature sve2}} - // overload-error@+1 {{'svldnt1uh_gather_index_s32' needs target feature sve2}} + // expected-error@+2 {{'svldnt1uh_gather_u32base_index_s32' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1uh_gather_index_s32' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1uh_gather, _u32base, _index_s32, )(pg, svundef_u32(), i64); - // expected-error@+2 {{'svqdmlalt_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svqdmlalt' needs target feature sve2|sme}} + // expected-error@+2 {{'svqdmlalt_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqdmlalt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqdmlalt,_s32,,)(svundef_s32(), svundef_s16(), svundef_s16()); - // expected-error@+2 {{'svqdmlalt_n_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svqdmlalt' needs target feature sve2|sme}} + // expected-error@+2 {{'svqdmlalt_n_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqdmlalt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqdmlalt,_n_s32,,)(svundef_s32(), svundef_s16(), i16); - // expected-error@+2 {{'svqdmlalt_lane_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svqdmlalt_lane' needs target feature sve2|sme}} + // expected-error@+2 {{'svqdmlalt_lane_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqdmlalt_lane' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqdmlalt_lane,_s32,,)(svundef_s32(), svundef_s16(), svundef_s16(), 1); - // expected-error@+2 {{'svbcax_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svbcax' needs target feature sve2|sme}} + // expected-error@+2 {{'svbcax_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svbcax' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svbcax,_s32,,)(svundef_s32(), svundef_s32(), svundef_s32()); - // expected-error@+2 {{'svbcax_n_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svbcax' needs target feature sve2|sme}} + // expected-error@+2 {{'svbcax_n_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svbcax' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svbcax,_n_s32,,)(svundef_s32(), svundef_s32(), i32); - // expected-error@+2 {{'svqxtnt_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svqxtnt' needs target feature sve2|sme}} + // expected-error@+2 {{'svqxtnt_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqxtnt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqxtnt,_s32,,)(svundef_s16(), svundef_s32()); - // expected-error@+2 {{'svqdmlalb_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svqdmlalb' needs target feature sve2|sme}} + // expected-error@+2 {{'svqdmlalb_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqdmlalb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqdmlalb,_s32,,)(svundef_s32(), svundef_s16(), svundef_s16()); - // expected-error@+2 {{'svqdmlalb_n_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svqdmlalb' needs target feature sve2|sme}} + // expected-error@+2 {{'svqdmlalb_n_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqdmlalb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqdmlalb,_n_s32,,)(svundef_s32(), svundef_s16(), i16); - // expected-error@+2 {{'svqdmlalb_lane_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svqdmlalb_lane' needs target feature sve2|sme}} + // expected-error@+2 {{'svqdmlalb_lane_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqdmlalb_lane' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqdmlalb_lane,_s32,,)(svundef_s32(), svundef_s16(), svundef_s16(), 1); - // expected-error@+2 {{'svqrshl_s32_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svqrshl_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svqrshl_s32_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqrshl_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqrshl,_s32,_z,)(pg, svundef_s32(), svundef_s32()); - // expected-error@+2 {{'svqrshl_s32_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svqrshl_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svqrshl_s32_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqrshl_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqrshl,_s32,_m,)(pg, svundef_s32(), svundef_s32()); - // expected-error@+2 {{'svqrshl_s32_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svqrshl_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svqrshl_s32_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqrshl_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqrshl,_s32,_x,)(pg, svundef_s32(), svundef_s32()); - // expected-error@+2 {{'svqrshl_n_s32_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svqrshl_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svqrshl_n_s32_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqrshl_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqrshl,_n_s32,_z,)(pg, svundef_s32(), i32); - // expected-error@+2 {{'svqrshl_n_s32_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svqrshl_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svqrshl_n_s32_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqrshl_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqrshl,_n_s32,_m,)(pg, svundef_s32(), i32); - // expected-error@+2 {{'svqrshl_n_s32_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svqrshl_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svqrshl_n_s32_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqrshl_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqrshl,_n_s32,_x,)(pg, svundef_s32(), i32); - // expected-error@+2 {{'svcdot_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svcdot' needs target feature sve2|sme}} + // expected-error@+2 {{'svcdot_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svcdot' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svcdot,_s32,,)(svundef_s32(), svundef_s8(), svundef_s8(), 90); - // expected-error@+2 {{'svsublbt_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svsublbt' needs target feature sve2|sme}} + // expected-error@+2 {{'svsublbt_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svsublbt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svsublbt,_s32,,)(svundef_s16(), svundef_s16()); - // expected-error@+2 {{'svsublbt_n_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svsublbt' needs target feature sve2|sme}} + // expected-error@+2 {{'svsublbt_n_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svsublbt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svsublbt,_n_s32,,)(svundef_s16(), i16); - // expected-error@+2 {{'svqdmullt_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svqdmullt' needs target feature sve2|sme}} + // expected-error@+2 {{'svqdmullt_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqdmullt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqdmullt,_s32,,)(svundef_s16(), svundef_s16()); - // expected-error@+2 {{'svqdmullt_n_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svqdmullt' needs target feature sve2|sme}} + // expected-error@+2 {{'svqdmullt_n_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqdmullt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqdmullt,_n_s32,,)(svundef_s16(), i16); - // expected-error@+2 {{'svqdmullt_lane_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svqdmullt_lane' needs target feature sve2|sme}} + // expected-error@+2 {{'svqdmullt_lane_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqdmullt_lane' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqdmullt_lane,_s32,,)(svundef_s16(), svundef_s16(), 1); - // expected-error@+2 {{'svsublt_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svsublt' needs target feature sve2|sme}} + // expected-error@+2 {{'svsublt_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svsublt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svsublt,_s32,,)(svundef_s16(), svundef_s16()); - // expected-error@+2 {{'svsublt_n_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svsublt' needs target feature sve2|sme}} + // expected-error@+2 {{'svsublt_n_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svsublt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svsublt,_n_s32,,)(svundef_s16(), i16); - // expected-error@+2 {{'svqdmlslbt_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svqdmlslbt' needs target feature sve2|sme}} + // expected-error@+2 {{'svqdmlslbt_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqdmlslbt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqdmlslbt,_s32,,)(svundef_s32(), svundef_s16(), svundef_s16()); - // expected-error@+2 {{'svqdmlslbt_n_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svqdmlslbt' needs target feature sve2|sme}} + // expected-error@+2 {{'svqdmlslbt_n_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqdmlslbt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqdmlslbt,_n_s32,,)(svundef_s32(), svundef_s16(), i16); - // expected-error@+2 {{'svadalp_s32_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svadalp_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svadalp_s32_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svadalp_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svadalp,_s32,_z,)(pg, svundef_s32(), svundef_s16()); - // expected-error@+2 {{'svadalp_s32_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svadalp_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svadalp_s32_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svadalp_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svadalp,_s32,_m,)(pg, svundef_s32(), svundef_s16()); - // expected-error@+2 {{'svadalp_s32_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svadalp_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svadalp_s32_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svadalp_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svadalp,_s32,_x,)(pg, svundef_s32(), svundef_s16()); - // expected-error@+2 {{'svwhilege_b8_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svwhilege_b8' needs target feature sve2|sme}} + // expected-error@+2 {{'svwhilege_b8_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svwhilege_b8' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svwhilege_b8,_s32,,)(i32, i32); - // expected-error@+2 {{'svwhilege_b16_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svwhilege_b16' needs target feature sve2|sme}} + // expected-error@+2 {{'svwhilege_b16_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svwhilege_b16' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svwhilege_b16,_s32,,)(i32, i32); - // expected-error@+2 {{'svwhilege_b32_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svwhilege_b32' needs target feature sve2|sme}} + // expected-error@+2 {{'svwhilege_b32_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svwhilege_b32' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svwhilege_b32,_s32,,)(i32, i32); - // expected-error@+2 {{'svwhilege_b64_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svwhilege_b64' needs target feature sve2|sme}} + // expected-error@+2 {{'svwhilege_b64_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svwhilege_b64' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svwhilege_b64,_s32,,)(i32, i32); - // expected-error@+2 {{'svsubwt_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svsubwt' needs target feature sve2|sme}} + // expected-error@+2 {{'svsubwt_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svsubwt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svsubwt,_s32,,)(svundef_s32(), svundef_s16()); - // expected-error@+2 {{'svsubwt_n_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svsubwt' needs target feature sve2|sme}} + // expected-error@+2 {{'svsubwt_n_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svsubwt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svsubwt,_n_s32,,)(svundef_s32(), i16); - // expected-error@+2 {{'svqsubr_s32_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svqsubr_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svqsubr_s32_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqsubr_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqsubr,_s32,_z,)(pg, svundef_s32(), svundef_s32()); - // expected-error@+2 {{'svqsubr_s32_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svqsubr_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svqsubr_s32_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqsubr_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqsubr,_s32,_m,)(pg, svundef_s32(), svundef_s32()); - // expected-error@+2 {{'svqsubr_s32_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svqsubr_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svqsubr_s32_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqsubr_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqsubr,_s32,_x,)(pg, svundef_s32(), svundef_s32()); - // expected-error@+2 {{'svqsubr_n_s32_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svqsubr_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svqsubr_n_s32_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqsubr_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqsubr,_n_s32,_z,)(pg, svundef_s32(), i32); - // expected-error@+2 {{'svqsubr_n_s32_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svqsubr_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svqsubr_n_s32_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqsubr_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqsubr,_n_s32,_m,)(pg, svundef_s32(), i32); - // expected-error@+2 {{'svqsubr_n_s32_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svqsubr_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svqsubr_n_s32_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqsubr_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqsubr,_n_s32,_x,)(pg, svundef_s32(), i32); - // expected-error@+2 {{'svaddp_s32_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svaddp_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svaddp_s32_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svaddp_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svaddp,_s32,_m,)(pg, svundef_s32(), svundef_s32()); - // expected-error@+2 {{'svaddp_s32_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svaddp_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svaddp_s32_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svaddp_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svaddp,_s32,_x,)(pg, svundef_s32(), svundef_s32()); - // expected-error@+2 {{'svqadd_s32_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svqadd_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svqadd_s32_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqadd_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqadd,_s32,_m,)(pg, svundef_s32(), svundef_s32()); - // expected-error@+2 {{'svqadd_n_s32_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svqadd_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svqadd_n_s32_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqadd_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqadd,_n_s32,_m,)(pg, svundef_s32(), i32); - // expected-error@+2 {{'svqadd_s32_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svqadd_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svqadd_s32_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqadd_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqadd,_s32,_z,)(pg, svundef_s32(), svundef_s32()); - // expected-error@+2 {{'svqadd_n_s32_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svqadd_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svqadd_n_s32_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqadd_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqadd,_n_s32,_z,)(pg, svundef_s32(), i32); - // expected-error@+2 {{'svqadd_s32_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svqadd_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svqadd_s32_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqadd_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqadd,_s32,_x,)(pg, svundef_s32(), svundef_s32()); - // expected-error@+2 {{'svqadd_n_s32_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svqadd_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svqadd_n_s32_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqadd_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqadd,_n_s32,_x,)(pg, svundef_s32(), i32); - // expected-error@+2 {{'svabdlb_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svabdlb' needs target feature sve2|sme}} + // expected-error@+2 {{'svabdlb_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svabdlb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svabdlb,_s32,,)(svundef_s16(), svundef_s16()); - // expected-error@+2 {{'svabdlb_n_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svabdlb' needs target feature sve2|sme}} + // expected-error@+2 {{'svabdlb_n_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svabdlb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svabdlb,_n_s32,,)(svundef_s16(), i16); - // expected-error@+2 {{'svtbx_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svtbx' needs target feature sve2|sme}} + // expected-error@+2 {{'svtbx_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svtbx' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svtbx,_s32,,)(svundef_s32(), svundef_s32(), svundef_u32()); - // expected-error@+2 {{'svabdlt_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svabdlt' needs target feature sve2|sme}} + // expected-error@+2 {{'svabdlt_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svabdlt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svabdlt,_s32,,)(svundef_s16(), svundef_s16()); - // expected-error@+2 {{'svabdlt_n_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svabdlt' needs target feature sve2|sme}} + // expected-error@+2 {{'svabdlt_n_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svabdlt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svabdlt,_n_s32,,)(svundef_s16(), i16); - // expected-error@+2 {{'svminp_s32_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svminp_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svminp_s32_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svminp_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svminp,_s32,_m,)(pg, svundef_s32(), svundef_s32()); - // expected-error@+2 {{'svminp_s32_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svminp_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svminp_s32_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svminp_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svminp,_s32,_x,)(pg, svundef_s32(), svundef_s32()); - // expected-error@+2 {{'svqsub_s32_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svqsub_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svqsub_s32_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqsub_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqsub,_s32,_z,)(pg, svundef_s32(), svundef_s32()); - // expected-error@+2 {{'svqsub_s32_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svqsub_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svqsub_s32_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqsub_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqsub,_s32,_m,)(pg, svundef_s32(), svundef_s32()); - // expected-error@+2 {{'svqsub_s32_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svqsub_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svqsub_s32_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqsub_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqsub,_s32,_x,)(pg, svundef_s32(), svundef_s32()); - // expected-error@+2 {{'svqsub_n_s32_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svqsub_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svqsub_n_s32_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqsub_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqsub,_n_s32,_z,)(pg, svundef_s32(), i32); - // expected-error@+2 {{'svqsub_n_s32_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svqsub_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svqsub_n_s32_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqsub_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqsub,_n_s32,_m,)(pg, svundef_s32(), i32); - // expected-error@+2 {{'svqsub_n_s32_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svqsub_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svqsub_n_s32_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqsub_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqsub,_n_s32,_x,)(pg, svundef_s32(), i32); - // expected-error@+2 {{'svrsubhnb_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svrsubhnb' needs target feature sve2|sme}} + // expected-error@+2 {{'svrsubhnb_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrsubhnb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrsubhnb,_s32,,)(svundef_s32(), svundef_s32()); - // expected-error@+2 {{'svrsubhnb_n_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svrsubhnb' needs target feature sve2|sme}} + // expected-error@+2 {{'svrsubhnb_n_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrsubhnb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrsubhnb,_n_s32,,)(svundef_s32(), i32); - // expected-error@+2 {{'svaddhnb_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svaddhnb' needs target feature sve2|sme}} + // expected-error@+2 {{'svaddhnb_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svaddhnb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svaddhnb,_s32,,)(svundef_s32(), svundef_s32()); - // expected-error@+2 {{'svaddhnb_n_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svaddhnb' needs target feature sve2|sme}} + // expected-error@+2 {{'svaddhnb_n_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svaddhnb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svaddhnb,_n_s32,,)(svundef_s32(), i32); - // expected-error@+2 {{'svabalt_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svabalt' needs target feature sve2|sme}} + // expected-error@+2 {{'svabalt_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svabalt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svabalt,_s32,,)(svundef_s32(), svundef_s16(), svundef_s16()); - // expected-error@+2 {{'svabalt_n_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svabalt' needs target feature sve2|sme}} + // expected-error@+2 {{'svabalt_n_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svabalt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svabalt,_n_s32,,)(svundef_s32(), svundef_s16(), i16); - // expected-error@+2 {{'sveor3_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'sveor3' needs target feature sve2|sme}} + // expected-error@+2 {{'sveor3_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'sveor3' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(sveor3,_s32,,)(svundef_s32(), svundef_s32(), svundef_s32()); - // expected-error@+2 {{'sveor3_n_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'sveor3' needs target feature sve2|sme}} + // expected-error@+2 {{'sveor3_n_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'sveor3' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(sveor3,_n_s32,,)(svundef_s32(), svundef_s32(), i32); - // expected-error@+2 {{'svhadd_s32_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svhadd_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svhadd_s32_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhadd_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhadd,_s32,_m,)(pg, svundef_s32(), svundef_s32()); - // expected-error@+2 {{'svhadd_n_s32_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svhadd_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svhadd_n_s32_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhadd_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhadd,_n_s32,_m,)(pg, svundef_s32(), i32); - // expected-error@+2 {{'svhadd_s32_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svhadd_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svhadd_s32_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhadd_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhadd,_s32,_z,)(pg, svundef_s32(), svundef_s32()); - // expected-error@+2 {{'svhadd_n_s32_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svhadd_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svhadd_n_s32_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhadd_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhadd,_n_s32,_z,)(pg, svundef_s32(), i32); - // expected-error@+2 {{'svhadd_s32_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svhadd_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svhadd_s32_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhadd_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhadd,_s32,_x,)(pg, svundef_s32(), svundef_s32()); - // expected-error@+2 {{'svhadd_n_s32_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svhadd_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svhadd_n_s32_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhadd_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhadd,_n_s32,_x,)(pg, svundef_s32(), i32); - // expected-error@+2 {{'svmovlb_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svmovlb' needs target feature sve2|sme}} + // expected-error@+2 {{'svmovlb_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svmovlb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svmovlb,_s32,,)(svundef_s16()); - // expected-error@+2 {{'svstnt1_scatter_u32base_s32' needs target feature sve2}} - // overload-error@+1 {{'svstnt1_scatter' needs target feature sve2}} + // expected-error@+2 {{'svstnt1_scatter_u32base_s32' needs target feature sve,sve2}} + // overload-error@+1 {{'svstnt1_scatter' needs target feature sve,sve2}} SVE_ACLE_FUNC(svstnt1_scatter, _u32base, , _s32)(pg, svundef_u32(), svundef_s32()); - // expected-error@+2 {{'svstnt1_scatter_u32offset_s32' needs target feature sve2}} - // overload-error@+1 {{'svstnt1_scatter_offset' needs target feature sve2}} + // expected-error@+2 {{'svstnt1_scatter_u32offset_s32' needs target feature sve,sve2}} + // overload-error@+1 {{'svstnt1_scatter_offset' needs target feature sve,sve2}} SVE_ACLE_FUNC(svstnt1_scatter_, u32, offset, _s32)(pg, i32_ptr, svundef_u32(), svundef_s32()); - // expected-error@+2 {{'svstnt1_scatter_u32base_offset_s32' needs target feature sve2}} - // overload-error@+1 {{'svstnt1_scatter_offset' needs target feature sve2}} + // expected-error@+2 {{'svstnt1_scatter_u32base_offset_s32' needs target feature sve,sve2}} + // overload-error@+1 {{'svstnt1_scatter_offset' needs target feature sve,sve2}} SVE_ACLE_FUNC(svstnt1_scatter, _u32base, _offset, _s32)(pg, svundef_u32(), i64, svundef_s32()); - // expected-error@+2 {{'svstnt1_scatter_u32base_index_s32' needs target feature sve2}} - // overload-error@+1 {{'svstnt1_scatter_index' needs target feature sve2}} + // expected-error@+2 {{'svstnt1_scatter_u32base_index_s32' needs target feature sve,sve2}} + // overload-error@+1 {{'svstnt1_scatter_index' needs target feature sve,sve2}} SVE_ACLE_FUNC(svstnt1_scatter, _u32base, _index, _s32)(pg, svundef_u32(), i64, svundef_s32()); - // expected-error@+2 {{'svqrdmlsh_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svqrdmlsh' needs target feature sve2|sme}} + // expected-error@+2 {{'svqrdmlsh_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqrdmlsh' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqrdmlsh,_s32,,)(svundef_s32(), svundef_s32(), svundef_s32()); - // expected-error@+2 {{'svqrdmlsh_n_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svqrdmlsh' needs target feature sve2|sme}} + // expected-error@+2 {{'svqrdmlsh_n_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqrdmlsh' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqrdmlsh,_n_s32,,)(svundef_s32(), svundef_s32(), i32); - // expected-error@+2 {{'svqdmlslt_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svqdmlslt' needs target feature sve2|sme}} + // expected-error@+2 {{'svqdmlslt_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqdmlslt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqdmlslt,_s32,,)(svundef_s32(), svundef_s16(), svundef_s16()); - // expected-error@+2 {{'svqdmlslt_n_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svqdmlslt' needs target feature sve2|sme}} + // expected-error@+2 {{'svqdmlslt_n_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqdmlslt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqdmlslt,_n_s32,,)(svundef_s32(), svundef_s16(), i16); - // expected-error@+2 {{'svqdmlslt_lane_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svqdmlslt_lane' needs target feature sve2|sme}} + // expected-error@+2 {{'svqdmlslt_lane_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqdmlslt_lane' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqdmlslt_lane,_s32,,)(svundef_s32(), svundef_s16(), svundef_s16(), 1); - // expected-error@+2 {{'svmaxp_s32_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svmaxp_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svmaxp_s32_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svmaxp_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svmaxp,_s32,_m,)(pg, svundef_s32(), svundef_s32()); - // expected-error@+2 {{'svmaxp_s32_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svmaxp_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svmaxp_s32_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svmaxp_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svmaxp,_s32,_x,)(pg, svundef_s32(), svundef_s32()); - // expected-error@+2 {{'svmullt_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svmullt' needs target feature sve2|sme}} + // expected-error@+2 {{'svmullt_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svmullt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svmullt,_s32,,)(svundef_s16(), svundef_s16()); - // expected-error@+2 {{'svmullt_n_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svmullt' needs target feature sve2|sme}} + // expected-error@+2 {{'svmullt_n_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svmullt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svmullt,_n_s32,,)(svundef_s16(), i16); - // expected-error@+2 {{'svmullt_lane_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svmullt_lane' needs target feature sve2|sme}} + // expected-error@+2 {{'svmullt_lane_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svmullt_lane' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svmullt_lane,_s32,,)(svundef_s16(), svundef_s16(), 1); - // expected-error@+2 {{'svldnt1sh_gather_u32base_s32' needs target feature sve2}} - // overload-error@+1 {{'svldnt1sh_gather_s32' needs target feature sve2}} + // expected-error@+2 {{'svldnt1sh_gather_u32base_s32' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1sh_gather_s32' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1sh_gather, _u32base, _s32, )(pg, svundef_u32()); - // expected-error@+2 {{'svldnt1sh_gather_u32offset_s32' needs target feature sve2}} - // overload-error@+1 {{'svldnt1sh_gather_offset_s32' needs target feature sve2}} + // expected-error@+2 {{'svldnt1sh_gather_u32offset_s32' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1sh_gather_offset_s32' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1sh_gather_, u32, offset_s32, )(pg, const_i16_ptr, svundef_u32()); - // expected-error@+2 {{'svldnt1sh_gather_u32base_offset_s32' needs target feature sve2}} - // overload-error@+1 {{'svldnt1sh_gather_offset_s32' needs target feature sve2}} + // expected-error@+2 {{'svldnt1sh_gather_u32base_offset_s32' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1sh_gather_offset_s32' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1sh_gather, _u32base, _offset_s32, )(pg, svundef_u32(), i64); - // expected-error@+2 {{'svldnt1sh_gather_u32base_index_s32' needs target feature sve2}} - // overload-error@+1 {{'svldnt1sh_gather_index_s32' needs target feature sve2}} + // expected-error@+2 {{'svldnt1sh_gather_u32base_index_s32' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1sh_gather_index_s32' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1sh_gather, _u32base, _index_s32, )(pg, svundef_u32(), i64); - // expected-error@+2 {{'svqxtunb_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svqxtunb' needs target feature sve2|sme}} + // expected-error@+2 {{'svqxtunb_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqxtunb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqxtunb,_s32,,)(svundef_s32()); - // expected-error@+2 {{'svwhilerw_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svwhilerw' needs target feature sve2|sme}} + // expected-error@+2 {{'svwhilerw_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svwhilerw' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svwhilerw,_s32,,)(const_i32_ptr, const_i32_ptr); - // expected-error@+2 {{'svrhadd_s32_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svrhadd_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svrhadd_s32_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrhadd_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrhadd,_s32,_m,)(pg, svundef_s32(), svundef_s32()); - // expected-error@+2 {{'svrhadd_n_s32_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svrhadd_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svrhadd_n_s32_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrhadd_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrhadd,_n_s32,_m,)(pg, svundef_s32(), i32); - // expected-error@+2 {{'svrhadd_s32_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svrhadd_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svrhadd_s32_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrhadd_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrhadd,_s32,_z,)(pg, svundef_s32(), svundef_s32()); - // expected-error@+2 {{'svrhadd_n_s32_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svrhadd_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svrhadd_n_s32_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrhadd_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrhadd,_n_s32,_z,)(pg, svundef_s32(), i32); - // expected-error@+2 {{'svrhadd_s32_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svrhadd_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svrhadd_s32_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrhadd_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrhadd,_s32,_x,)(pg, svundef_s32(), svundef_s32()); - // expected-error@+2 {{'svrhadd_n_s32_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svrhadd_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svrhadd_n_s32_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrhadd_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrhadd,_n_s32,_x,)(pg, svundef_s32(), i32); - // expected-error@+2 {{'svraddhnb_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svraddhnb' needs target feature sve2|sme}} + // expected-error@+2 {{'svraddhnb_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svraddhnb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svraddhnb,_s32,,)(svundef_s32(), svundef_s32()); - // expected-error@+2 {{'svraddhnb_n_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svraddhnb' needs target feature sve2|sme}} + // expected-error@+2 {{'svraddhnb_n_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svraddhnb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svraddhnb,_n_s32,,)(svundef_s32(), i32); - // expected-error@+2 {{'svwhilewr_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svwhilewr' needs target feature sve2|sme}} + // expected-error@+2 {{'svwhilewr_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svwhilewr' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svwhilewr,_s32,,)(const_i32_ptr, const_i32_ptr); - // expected-error@+2 {{'svmlalb_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svmlalb' needs target feature sve2|sme}} + // expected-error@+2 {{'svmlalb_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svmlalb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svmlalb,_s32,,)(svundef_s32(), svundef_s16(), svundef_s16()); - // expected-error@+2 {{'svmlalb_n_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svmlalb' needs target feature sve2|sme}} + // expected-error@+2 {{'svmlalb_n_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svmlalb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svmlalb,_n_s32,,)(svundef_s32(), svundef_s16(), i16); - // expected-error@+2 {{'svmlalb_lane_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svmlalb_lane' needs target feature sve2|sme}} + // expected-error@+2 {{'svmlalb_lane_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svmlalb_lane' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svmlalb_lane,_s32,,)(svundef_s32(), svundef_s16(), svundef_s16(), 1); - // expected-error@+2 {{'svldnt1sb_gather_u32base_s32' needs target feature sve2}} - // overload-error@+1 {{'svldnt1sb_gather_s32' needs target feature sve2}} + // expected-error@+2 {{'svldnt1sb_gather_u32base_s32' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1sb_gather_s32' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1sb_gather, _u32base, _s32, )(pg, svundef_u32()); - // expected-error@+2 {{'svldnt1sb_gather_u32offset_s32' needs target feature sve2}} - // overload-error@+1 {{'svldnt1sb_gather_offset_s32' needs target feature sve2}} + // expected-error@+2 {{'svldnt1sb_gather_u32offset_s32' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1sb_gather_offset_s32' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1sb_gather_, u32, offset_s32, )(pg, const_i8_ptr, svundef_u32()); - // expected-error@+2 {{'svldnt1sb_gather_u32base_offset_s32' needs target feature sve2}} - // overload-error@+1 {{'svldnt1sb_gather_offset_s32' needs target feature sve2}} + // expected-error@+2 {{'svldnt1sb_gather_u32base_offset_s32' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1sb_gather_offset_s32' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1sb_gather, _u32base, _offset_s32, )(pg, svundef_u32(), i64); - // expected-error@+2 {{'svsubwb_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svsubwb' needs target feature sve2|sme}} + // expected-error@+2 {{'svsubwb_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svsubwb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svsubwb,_s32,,)(svundef_s32(), svundef_s16()); - // expected-error@+2 {{'svsubwb_n_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svsubwb' needs target feature sve2|sme}} + // expected-error@+2 {{'svsubwb_n_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svsubwb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svsubwb,_n_s32,,)(svundef_s32(), i16); - // expected-error@+2 {{'svldnt1ub_gather_u32base_s32' needs target feature sve2}} - // overload-error@+1 {{'svldnt1ub_gather_s32' needs target feature sve2}} + // expected-error@+2 {{'svldnt1ub_gather_u32base_s32' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1ub_gather_s32' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1ub_gather, _u32base, _s32, )(pg, svundef_u32()); - // expected-error@+2 {{'svldnt1ub_gather_u32offset_s32' needs target feature sve2}} - // overload-error@+1 {{'svldnt1ub_gather_offset_s32' needs target feature sve2}} + // expected-error@+2 {{'svldnt1ub_gather_u32offset_s32' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1ub_gather_offset_s32' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1ub_gather_, u32, offset_s32, )(pg, const_u8_ptr, svundef_u32()); - // expected-error@+2 {{'svldnt1ub_gather_u32base_offset_s32' needs target feature sve2}} - // overload-error@+1 {{'svldnt1ub_gather_offset_s32' needs target feature sve2}} + // expected-error@+2 {{'svldnt1ub_gather_u32base_offset_s32' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1ub_gather_offset_s32' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1ub_gather, _u32base, _offset_s32, )(pg, svundef_u32(), i64); - // expected-error@+2 {{'svaba_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svaba' needs target feature sve2|sme}} + // expected-error@+2 {{'svaba_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svaba' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svaba,_s32,,)(svundef_s32(), svundef_s32(), svundef_s32()); - // expected-error@+2 {{'svaba_n_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svaba' needs target feature sve2|sme}} + // expected-error@+2 {{'svaba_n_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svaba' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svaba,_n_s32,,)(svundef_s32(), svundef_s32(), i32); - // expected-error@+2 {{'svraddhnt_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svraddhnt' needs target feature sve2|sme}} + // expected-error@+2 {{'svraddhnt_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svraddhnt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svraddhnt,_s32,,)(svundef_s16(), svundef_s32(), svundef_s32()); - // expected-error@+2 {{'svraddhnt_n_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svraddhnt' needs target feature sve2|sme}} + // expected-error@+2 {{'svraddhnt_n_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svraddhnt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svraddhnt,_n_s32,,)(svundef_s16(), svundef_s32(), i32); - // expected-error@+2 {{'svuqadd_s32_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svuqadd_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svuqadd_s32_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svuqadd_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svuqadd,_s32,_m,)(pg, svundef_s32(), svundef_u32()); - // expected-error@+2 {{'svuqadd_n_s32_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svuqadd_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svuqadd_n_s32_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svuqadd_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svuqadd,_n_s32,_m,)(pg, svundef_s32(), u32); - // expected-error@+2 {{'svuqadd_s32_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svuqadd_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svuqadd_s32_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svuqadd_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svuqadd,_s32,_z,)(pg, svundef_s32(), svundef_u32()); - // expected-error@+2 {{'svuqadd_n_s32_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svuqadd_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svuqadd_n_s32_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svuqadd_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svuqadd,_n_s32,_z,)(pg, svundef_s32(), u32); - // expected-error@+2 {{'svuqadd_s32_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svuqadd_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svuqadd_s32_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svuqadd_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svuqadd,_s32,_x,)(pg, svundef_s32(), svundef_u32()); - // expected-error@+2 {{'svuqadd_n_s32_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svuqadd_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svuqadd_n_s32_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svuqadd_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svuqadd,_n_s32,_x,)(pg, svundef_s32(), u32); - // expected-error@+2 {{'sveorbt_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'sveorbt' needs target feature sve2|sme}} + // expected-error@+2 {{'sveorbt_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'sveorbt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(sveorbt,_s32,,)(svundef_s32(), svundef_s32(), svundef_s32()); - // expected-error@+2 {{'sveorbt_n_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'sveorbt' needs target feature sve2|sme}} + // expected-error@+2 {{'sveorbt_n_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'sveorbt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(sveorbt,_n_s32,,)(svundef_s32(), svundef_s32(), i32); - // expected-error@+2 {{'svbsl_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svbsl' needs target feature sve2|sme}} + // expected-error@+2 {{'svbsl_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svbsl' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svbsl,_s32,,)(svundef_s32(), svundef_s32(), svundef_s32()); - // expected-error@+2 {{'svbsl_n_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svbsl' needs target feature sve2|sme}} + // expected-error@+2 {{'svbsl_n_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svbsl' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svbsl,_n_s32,,)(svundef_s32(), svundef_s32(), i32); - // expected-error@+2 {{'svsubltb_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svsubltb' needs target feature sve2|sme}} + // expected-error@+2 {{'svsubltb_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svsubltb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svsubltb,_s32,,)(svundef_s16(), svundef_s16()); - // expected-error@+2 {{'svsubltb_n_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svsubltb' needs target feature sve2|sme}} + // expected-error@+2 {{'svsubltb_n_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svsubltb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svsubltb,_n_s32,,)(svundef_s16(), i16); - // expected-error@+2 {{'svhsub_s32_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svhsub_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svhsub_s32_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhsub_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhsub,_s32,_z,)(pg, svundef_s32(), svundef_s32()); - // expected-error@+2 {{'svhsub_s32_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svhsub_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svhsub_s32_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhsub_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhsub,_s32,_m,)(pg, svundef_s32(), svundef_s32()); - // expected-error@+2 {{'svhsub_s32_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svhsub_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svhsub_s32_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhsub_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhsub,_s32,_x,)(pg, svundef_s32(), svundef_s32()); - // expected-error@+2 {{'svhsub_n_s32_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svhsub_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svhsub_n_s32_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhsub_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhsub,_n_s32,_z,)(pg, svundef_s32(), i32); - // expected-error@+2 {{'svhsub_n_s32_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svhsub_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svhsub_n_s32_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhsub_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhsub,_n_s32,_m,)(pg, svundef_s32(), i32); - // expected-error@+2 {{'svhsub_n_s32_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svhsub_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svhsub_n_s32_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhsub_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhsub,_n_s32,_x,)(pg, svundef_s32(), i32); - // expected-error@+2 {{'svldnt1_gather_u32base_s32' needs target feature sve2}} - // overload-error@+1 {{'svldnt1_gather_s32' needs target feature sve2}} + // expected-error@+2 {{'svldnt1_gather_u32base_s32' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1_gather_s32' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1_gather, _u32base, _s32, )(pg, svundef_u32()); - // expected-error@+2 {{'svldnt1_gather_u32offset_s32' needs target feature sve2}} - // overload-error@+1 {{'svldnt1_gather_offset' needs target feature sve2}} + // expected-error@+2 {{'svldnt1_gather_u32offset_s32' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1_gather_offset' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1_gather_, u32, offset, _s32)(pg, const_i32_ptr, svundef_u32()); - // expected-error@+2 {{'svldnt1_gather_u32base_offset_s32' needs target feature sve2}} - // overload-error@+1 {{'svldnt1_gather_offset_s32' needs target feature sve2}} + // expected-error@+2 {{'svldnt1_gather_u32base_offset_s32' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1_gather_offset_s32' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1_gather, _u32base, _offset_s32, )(pg, svundef_u32(), i64); - // expected-error@+2 {{'svldnt1_gather_u32base_index_s32' needs target feature sve2}} - // overload-error@+1 {{'svldnt1_gather_index_s32' needs target feature sve2}} + // expected-error@+2 {{'svldnt1_gather_u32base_index_s32' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1_gather_index_s32' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1_gather, _u32base, _index_s32, )(pg, svundef_u32(), i64); - // expected-error@+2 {{'svaddlb_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svaddlb' needs target feature sve2|sme}} + // expected-error@+2 {{'svaddlb_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svaddlb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svaddlb,_s32,,)(svundef_s16(), svundef_s16()); - // expected-error@+2 {{'svaddlb_n_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svaddlb' needs target feature sve2|sme}} + // expected-error@+2 {{'svaddlb_n_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svaddlb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svaddlb,_n_s32,,)(svundef_s16(), i16); - // expected-error@+2 {{'svqrdmlah_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svqrdmlah' needs target feature sve2|sme}} + // expected-error@+2 {{'svqrdmlah_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqrdmlah' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqrdmlah,_s32,,)(svundef_s32(), svundef_s32(), svundef_s32()); - // expected-error@+2 {{'svqrdmlah_n_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svqrdmlah' needs target feature sve2|sme}} + // expected-error@+2 {{'svqrdmlah_n_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqrdmlah' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqrdmlah,_n_s32,,)(svundef_s32(), svundef_s32(), i32); - // expected-error@+2 {{'svqdmullb_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svqdmullb' needs target feature sve2|sme}} + // expected-error@+2 {{'svqdmullb_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqdmullb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqdmullb,_s32,,)(svundef_s16(), svundef_s16()); - // expected-error@+2 {{'svqdmullb_n_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svqdmullb' needs target feature sve2|sme}} + // expected-error@+2 {{'svqdmullb_n_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqdmullb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqdmullb,_n_s32,,)(svundef_s16(), i16); - // expected-error@+2 {{'svqdmullb_lane_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svqdmullb_lane' needs target feature sve2|sme}} + // expected-error@+2 {{'svqdmullb_lane_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqdmullb_lane' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqdmullb_lane,_s32,,)(svundef_s16(), svundef_s16(), 1); - // expected-error@+2 {{'svstnt1h_scatter_u32base_s32' needs target feature sve2}} - // overload-error@+1 {{'svstnt1h_scatter' needs target feature sve2}} + // expected-error@+2 {{'svstnt1h_scatter_u32base_s32' needs target feature sve,sve2}} + // overload-error@+1 {{'svstnt1h_scatter' needs target feature sve,sve2}} SVE_ACLE_FUNC(svstnt1h_scatter, _u32base, , _s32)(pg, svundef_u32(), svundef_s32()); - // expected-error@+2 {{'svstnt1h_scatter_u32offset_s32' needs target feature sve2}} - // overload-error@+1 {{'svstnt1h_scatter_offset' needs target feature sve2}} + // expected-error@+2 {{'svstnt1h_scatter_u32offset_s32' needs target feature sve,sve2}} + // overload-error@+1 {{'svstnt1h_scatter_offset' needs target feature sve,sve2}} SVE_ACLE_FUNC(svstnt1h_scatter_, u32, offset, _s32)(pg, i16_ptr, svundef_u32(), svundef_s32()); - // expected-error@+2 {{'svstnt1h_scatter_u32base_offset_s32' needs target feature sve2}} - // overload-error@+1 {{'svstnt1h_scatter_offset' needs target feature sve2}} + // expected-error@+2 {{'svstnt1h_scatter_u32base_offset_s32' needs target feature sve,sve2}} + // overload-error@+1 {{'svstnt1h_scatter_offset' needs target feature sve,sve2}} SVE_ACLE_FUNC(svstnt1h_scatter, _u32base, _offset, _s32)(pg, svundef_u32(), i64, svundef_s32()); - // expected-error@+2 {{'svstnt1h_scatter_u32base_index_s32' needs target feature sve2}} - // overload-error@+1 {{'svstnt1h_scatter_index' needs target feature sve2}} + // expected-error@+2 {{'svstnt1h_scatter_u32base_index_s32' needs target feature sve,sve2}} + // overload-error@+1 {{'svstnt1h_scatter_index' needs target feature sve,sve2}} SVE_ACLE_FUNC(svstnt1h_scatter, _u32base, _index, _s32)(pg, svundef_u32(), i64, svundef_s32()); - // expected-error@+2 {{'svstnt1b_scatter_u32base_s32' needs target feature sve2}} - // overload-error@+1 {{'svstnt1b_scatter' needs target feature sve2}} + // expected-error@+2 {{'svstnt1b_scatter_u32base_s32' needs target feature sve,sve2}} + // overload-error@+1 {{'svstnt1b_scatter' needs target feature sve,sve2}} SVE_ACLE_FUNC(svstnt1b_scatter, _u32base, , _s32)(pg, svundef_u32(), svundef_s32()); - // expected-error@+2 {{'svstnt1b_scatter_u32offset_s32' needs target feature sve2}} - // overload-error@+1 {{'svstnt1b_scatter_offset' needs target feature sve2}} + // expected-error@+2 {{'svstnt1b_scatter_u32offset_s32' needs target feature sve,sve2}} + // overload-error@+1 {{'svstnt1b_scatter_offset' needs target feature sve,sve2}} SVE_ACLE_FUNC(svstnt1b_scatter_, u32, offset, _s32)(pg, i8_ptr, svundef_u32(), svundef_s32()); - // expected-error@+2 {{'svstnt1b_scatter_u32base_offset_s32' needs target feature sve2}} - // overload-error@+1 {{'svstnt1b_scatter_offset' needs target feature sve2}} + // expected-error@+2 {{'svstnt1b_scatter_u32base_offset_s32' needs target feature sve,sve2}} + // overload-error@+1 {{'svstnt1b_scatter_offset' needs target feature sve,sve2}} SVE_ACLE_FUNC(svstnt1b_scatter, _u32base, _offset, _s32)(pg, svundef_u32(), i64, svundef_s32()); - // expected-error@+2 {{'svbsl2n_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svbsl2n' needs target feature sve2|sme}} + // expected-error@+2 {{'svbsl2n_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svbsl2n' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svbsl2n,_s32,,)(svundef_s32(), svundef_s32(), svundef_s32()); - // expected-error@+2 {{'svbsl2n_n_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svbsl2n' needs target feature sve2|sme}} + // expected-error@+2 {{'svbsl2n_n_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svbsl2n' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svbsl2n,_n_s32,,)(svundef_s32(), svundef_s32(), i32); - // expected-error@+2 {{'svaddlt_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svaddlt' needs target feature sve2|sme}} + // expected-error@+2 {{'svaddlt_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svaddlt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svaddlt,_s32,,)(svundef_s16(), svundef_s16()); - // expected-error@+2 {{'svaddlt_n_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svaddlt' needs target feature sve2|sme}} + // expected-error@+2 {{'svaddlt_n_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svaddlt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svaddlt,_n_s32,,)(svundef_s16(), i16); - // expected-error@+2 {{'svqxtunt_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svqxtunt' needs target feature sve2|sme}} + // expected-error@+2 {{'svqxtunt_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqxtunt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqxtunt,_s32,,)(svundef_u16(), svundef_s32()); - // expected-error@+2 {{'svabalb_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svabalb' needs target feature sve2|sme}} + // expected-error@+2 {{'svabalb_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svabalb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svabalb,_s32,,)(svundef_s32(), svundef_s16(), svundef_s16()); - // expected-error@+2 {{'svabalb_n_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svabalb' needs target feature sve2|sme}} + // expected-error@+2 {{'svabalb_n_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svabalb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svabalb,_n_s32,,)(svundef_s32(), svundef_s16(), i16); - // expected-error@+2 {{'svsublb_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svsublb' needs target feature sve2|sme}} + // expected-error@+2 {{'svsublb_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svsublb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svsublb,_s32,,)(svundef_s16(), svundef_s16()); - // expected-error@+2 {{'svsublb_n_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svsublb' needs target feature sve2|sme}} + // expected-error@+2 {{'svsublb_n_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svsublb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svsublb,_n_s32,,)(svundef_s16(), i16); - // expected-error@+2 {{'svbsl1n_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svbsl1n' needs target feature sve2|sme}} + // expected-error@+2 {{'svbsl1n_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svbsl1n' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svbsl1n,_s32,,)(svundef_s32(), svundef_s32(), svundef_s32()); - // expected-error@+2 {{'svbsl1n_n_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svbsl1n' needs target feature sve2|sme}} + // expected-error@+2 {{'svbsl1n_n_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svbsl1n' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svbsl1n,_n_s32,,)(svundef_s32(), svundef_s32(), i32); - // expected-error@+2 {{'svrshl_s32_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svrshl_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svrshl_s32_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrshl_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrshl,_s32,_z,)(pg, svundef_s32(), svundef_s32()); - // expected-error@+2 {{'svrshl_s32_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svrshl_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svrshl_s32_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrshl_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrshl,_s32,_m,)(pg, svundef_s32(), svundef_s32()); - // expected-error@+2 {{'svrshl_s32_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svrshl_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svrshl_s32_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrshl_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrshl,_s32,_x,)(pg, svundef_s32(), svundef_s32()); - // expected-error@+2 {{'svrshl_n_s32_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svrshl_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svrshl_n_s32_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrshl_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrshl,_n_s32,_z,)(pg, svundef_s32(), i32); - // expected-error@+2 {{'svrshl_n_s32_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svrshl_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svrshl_n_s32_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrshl_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrshl,_n_s32,_m,)(pg, svundef_s32(), i32); - // expected-error@+2 {{'svrshl_n_s32_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svrshl_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svrshl_n_s32_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrshl_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrshl,_n_s32,_x,)(pg, svundef_s32(), i32); - // expected-error@+2 {{'svaddwt_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svaddwt' needs target feature sve2|sme}} + // expected-error@+2 {{'svaddwt_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svaddwt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svaddwt,_s32,,)(svundef_s32(), svundef_s16()); - // expected-error@+2 {{'svaddwt_n_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svaddwt' needs target feature sve2|sme}} + // expected-error@+2 {{'svaddwt_n_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svaddwt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svaddwt,_n_s32,,)(svundef_s32(), i16); - // expected-error@+2 {{'svmlslb_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svmlslb' needs target feature sve2|sme}} + // expected-error@+2 {{'svmlslb_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svmlslb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svmlslb,_s32,,)(svundef_s32(), svundef_s16(), svundef_s16()); - // expected-error@+2 {{'svmlslb_n_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svmlslb' needs target feature sve2|sme}} + // expected-error@+2 {{'svmlslb_n_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svmlslb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svmlslb,_n_s32,,)(svundef_s32(), svundef_s16(), i16); - // expected-error@+2 {{'svmlslb_lane_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svmlslb_lane' needs target feature sve2|sme}} + // expected-error@+2 {{'svmlslb_lane_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svmlslb_lane' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svmlslb_lane,_s32,,)(svundef_s32(), svundef_s16(), svundef_s16(), 1); - // expected-error@+2 {{'svmlslt_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svmlslt' needs target feature sve2|sme}} + // expected-error@+2 {{'svmlslt_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svmlslt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svmlslt,_s32,,)(svundef_s32(), svundef_s16(), svundef_s16()); - // expected-error@+2 {{'svmlslt_n_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svmlslt' needs target feature sve2|sme}} + // expected-error@+2 {{'svmlslt_n_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svmlslt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svmlslt,_n_s32,,)(svundef_s32(), svundef_s16(), i16); - // expected-error@+2 {{'svmlslt_lane_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svmlslt_lane' needs target feature sve2|sme}} + // expected-error@+2 {{'svmlslt_lane_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svmlslt_lane' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svmlslt_lane,_s32,,)(svundef_s32(), svundef_s16(), svundef_s16(), 1); - // expected-error@+2 {{'svqneg_s32_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svqneg_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svqneg_s32_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqneg_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqneg,_s32,_z,)(pg, svundef_s32()); - // expected-error@+2 {{'svqneg_s32_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svqneg_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svqneg_s32_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqneg_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqneg,_s32,_m,)(svundef_s32(), pg, svundef_s32()); - // expected-error@+2 {{'svqneg_s32_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svqneg_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svqneg_s32_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqneg_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqneg,_s32,_x,)(pg, svundef_s32()); - // expected-error@+2 {{'svmovlt_s32' needs target feature sve2|sme}} - // overload-error@+1 {{'svmovlt' needs target feature sve2|sme}} + // expected-error@+2 {{'svmovlt_s32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svmovlt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svmovlt,_s32,,)(svundef_s16()); - // expected-error@+2 {{'svqshl_s32_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svqshl_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svqshl_s32_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqshl_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqshl,_s32,_z,)(pg, svundef_s32(), svundef_s32()); - // expected-error@+2 {{'svqshl_s32_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svqshl_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svqshl_s32_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqshl_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqshl,_s32,_m,)(pg, svundef_s32(), svundef_s32()); - // expected-error@+2 {{'svqshl_s32_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svqshl_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svqshl_s32_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqshl_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqshl,_s32,_x,)(pg, svundef_s32(), svundef_s32()); - // expected-error@+2 {{'svqshl_n_s32_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svqshl_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svqshl_n_s32_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqshl_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqshl,_n_s32,_z,)(pg, svundef_s32(), i32); - // expected-error@+2 {{'svqshl_n_s32_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svqshl_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svqshl_n_s32_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqshl_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqshl,_n_s32,_m,)(pg, svundef_s32(), i32); - // expected-error@+2 {{'svqshl_n_s32_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svqshl_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svqshl_n_s32_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqshl_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqshl,_n_s32,_x,)(pg, svundef_s32(), i32); - // expected-error@+2 {{'svmullb_s64' needs target feature sve2|sme}} - // overload-error@+1 {{'svmullb' needs target feature sve2|sme}} + // expected-error@+2 {{'svmullb_s64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svmullb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svmullb,_s64,,)(svundef_s32(), svundef_s32()); - // expected-error@+2 {{'svmullb_n_s64' needs target feature sve2|sme}} - // overload-error@+1 {{'svmullb' needs target feature sve2|sme}} + // expected-error@+2 {{'svmullb_n_s64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svmullb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svmullb,_n_s64,,)(svundef_s32(), i32); - // expected-error@+2 {{'svqdmlalbt_s64' needs target feature sve2|sme}} - // overload-error@+1 {{'svqdmlalbt' needs target feature sve2|sme}} + // expected-error@+2 {{'svqdmlalbt_s64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqdmlalbt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqdmlalbt,_s64,,)(svundef_s64(), svundef_s32(), svundef_s32()); - // expected-error@+2 {{'svqdmlalbt_n_s64' needs target feature sve2|sme}} - // overload-error@+1 {{'svqdmlalbt' needs target feature sve2|sme}} + // expected-error@+2 {{'svqdmlalbt_n_s64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqdmlalbt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqdmlalbt,_n_s64,,)(svundef_s64(), svundef_s32(), i32); - // expected-error@+2 {{'svqrdmulh_s64' needs target feature sve2|sme}} - // overload-error@+1 {{'svqrdmulh' needs target feature sve2|sme}} + // expected-error@+2 {{'svqrdmulh_s64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqrdmulh' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqrdmulh,_s64,,)(svundef_s64(), svundef_s64()); - // expected-error@+2 {{'svqrdmulh_n_s64' needs target feature sve2|sme}} - // overload-error@+1 {{'svqrdmulh' needs target feature sve2|sme}} + // expected-error@+2 {{'svqrdmulh_n_s64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqrdmulh' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqrdmulh,_n_s64,,)(svundef_s64(), i64); - // expected-error@+2 {{'svaddwb_s64' needs target feature sve2|sme}} - // overload-error@+1 {{'svaddwb' needs target feature sve2|sme}} + // expected-error@+2 {{'svaddwb_s64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svaddwb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svaddwb,_s64,,)(svundef_s64(), svundef_s32()); - // expected-error@+2 {{'svaddwb_n_s64' needs target feature sve2|sme}} - // overload-error@+1 {{'svaddwb' needs target feature sve2|sme}} + // expected-error@+2 {{'svaddwb_n_s64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svaddwb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svaddwb,_n_s64,,)(svundef_s64(), i32); - // expected-error@+2 {{'svsubhnb_s64' needs target feature sve2|sme}} - // overload-error@+1 {{'svsubhnb' needs target feature sve2|sme}} + // expected-error@+2 {{'svsubhnb_s64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svsubhnb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svsubhnb,_s64,,)(svundef_s64(), svundef_s64()); - // expected-error@+2 {{'svsubhnb_n_s64' needs target feature sve2|sme}} - // overload-error@+1 {{'svsubhnb' needs target feature sve2|sme}} + // expected-error@+2 {{'svsubhnb_n_s64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svsubhnb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svsubhnb,_n_s64,,)(svundef_s64(), i64); - // expected-error@+2 {{'svqdmulh_s64' needs target feature sve2|sme}} - // overload-error@+1 {{'svqdmulh' needs target feature sve2|sme}} + // expected-error@+2 {{'svqdmulh_s64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqdmulh' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqdmulh,_s64,,)(svundef_s64(), svundef_s64()); - // expected-error@+2 {{'svqdmulh_n_s64' needs target feature sve2|sme}} - // overload-error@+1 {{'svqdmulh' needs target feature sve2|sme}} + // expected-error@+2 {{'svqdmulh_n_s64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqdmulh' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqdmulh,_n_s64,,)(svundef_s64(), i64); - // expected-error@+2 {{'svrsubhnt_s64' needs target feature sve2|sme}} - // overload-error@+1 {{'svrsubhnt' needs target feature sve2|sme}} + // expected-error@+2 {{'svrsubhnt_s64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrsubhnt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrsubhnt,_s64,,)(svundef_s32(), svundef_s64(), svundef_s64()); - // expected-error@+2 {{'svrsubhnt_n_s64' needs target feature sve2|sme}} - // overload-error@+1 {{'svrsubhnt' needs target feature sve2|sme}} + // expected-error@+2 {{'svrsubhnt_n_s64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrsubhnt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrsubhnt,_n_s64,,)(svundef_s32(), svundef_s64(), i64); - // expected-error@+2 {{'svnbsl_s64' needs target feature sve2|sme}} - // overload-error@+1 {{'svnbsl' needs target feature sve2|sme}} + // expected-error@+2 {{'svnbsl_s64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svnbsl' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svnbsl,_s64,,)(svundef_s64(), svundef_s64(), svundef_s64()); - // expected-error@+2 {{'svnbsl_n_s64' needs target feature sve2|sme}} - // overload-error@+1 {{'svnbsl' needs target feature sve2|sme}} + // expected-error@+2 {{'svnbsl_n_s64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svnbsl' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svnbsl,_n_s64,,)(svundef_s64(), svundef_s64(), i64); - // expected-error@+2 {{'svqdmlslb_s64' needs target feature sve2|sme}} - // overload-error@+1 {{'svqdmlslb' needs target feature sve2|sme}} + // expected-error@+2 {{'svqdmlslb_s64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqdmlslb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqdmlslb,_s64,,)(svundef_s64(), svundef_s32(), svundef_s32()); - // expected-error@+2 {{'svqdmlslb_n_s64' needs target feature sve2|sme}} - // overload-error@+1 {{'svqdmlslb' needs target feature sve2|sme}} + // expected-error@+2 {{'svqdmlslb_n_s64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqdmlslb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqdmlslb,_n_s64,,)(svundef_s64(), svundef_s32(), i32); - // expected-error@+2 {{'svsubhnt_s64' needs target feature sve2|sme}} - // overload-error@+1 {{'svsubhnt' needs target feature sve2|sme}} + // expected-error@+2 {{'svsubhnt_s64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svsubhnt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svsubhnt,_s64,,)(svundef_s32(), svundef_s64(), svundef_s64()); - // expected-error@+2 {{'svsubhnt_n_s64' needs target feature sve2|sme}} - // overload-error@+1 {{'svsubhnt' needs target feature sve2|sme}} + // expected-error@+2 {{'svsubhnt_n_s64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svsubhnt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svsubhnt,_n_s64,,)(svundef_s32(), svundef_s64(), i64); - // expected-error@+2 {{'svqabs_s64_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svqabs_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svqabs_s64_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqabs_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqabs,_s64,_z,)(pg, svundef_s64()); - // expected-error@+2 {{'svqabs_s64_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svqabs_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svqabs_s64_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqabs_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqabs,_s64,_m,)(svundef_s64(), pg, svundef_s64()); - // expected-error@+2 {{'svqabs_s64_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svqabs_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svqabs_s64_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqabs_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqabs,_s64,_x,)(pg, svundef_s64()); - // expected-error@+2 {{'svwhilegt_b8_s64' needs target feature sve2|sme}} - // overload-error@+1 {{'svwhilegt_b8' needs target feature sve2|sme}} + // expected-error@+2 {{'svwhilegt_b8_s64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svwhilegt_b8' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svwhilegt_b8,_s64,,)(i64, i64); - // expected-error@+2 {{'svwhilegt_b16_s64' needs target feature sve2|sme}} - // overload-error@+1 {{'svwhilegt_b16' needs target feature sve2|sme}} + // expected-error@+2 {{'svwhilegt_b16_s64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svwhilegt_b16' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svwhilegt_b16,_s64,,)(i64, i64); - // expected-error@+2 {{'svwhilegt_b32_s64' needs target feature sve2|sme}} - // overload-error@+1 {{'svwhilegt_b32' needs target feature sve2|sme}} + // expected-error@+2 {{'svwhilegt_b32_s64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svwhilegt_b32' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svwhilegt_b32,_s64,,)(i64, i64); - // expected-error@+2 {{'svwhilegt_b64_s64' needs target feature sve2|sme}} - // overload-error@+1 {{'svwhilegt_b64' needs target feature sve2|sme}} + // expected-error@+2 {{'svwhilegt_b64_s64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svwhilegt_b64' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svwhilegt_b64,_s64,,)(i64, i64); - // expected-error@+2 {{'svaddlbt_s64' needs target feature sve2|sme}} - // overload-error@+1 {{'svaddlbt' needs target feature sve2|sme}} + // expected-error@+2 {{'svaddlbt_s64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svaddlbt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svaddlbt,_s64,,)(svundef_s32(), svundef_s32()); - // expected-error@+2 {{'svaddlbt_n_s64' needs target feature sve2|sme}} - // overload-error@+1 {{'svaddlbt' needs target feature sve2|sme}} + // expected-error@+2 {{'svaddlbt_n_s64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svaddlbt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svaddlbt,_n_s64,,)(svundef_s32(), i32); - // expected-error@+2 {{'svtbl2_s64' needs target feature sve2|sme}} - // overload-error@+1 {{'svtbl2' needs target feature sve2|sme}} + // expected-error@+2 {{'svtbl2_s64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svtbl2' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svtbl2,_s64,,)(svundef2_s64(), svundef_u64()); - // expected-error@+2 {{'svhsubr_s64_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svhsubr_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svhsubr_s64_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhsubr_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhsubr,_s64,_z,)(pg, svundef_s64(), svundef_s64()); - // expected-error@+2 {{'svhsubr_s64_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svhsubr_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svhsubr_s64_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhsubr_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhsubr,_s64,_m,)(pg, svundef_s64(), svundef_s64()); - // expected-error@+2 {{'svhsubr_s64_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svhsubr_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svhsubr_s64_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhsubr_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhsubr,_s64,_x,)(pg, svundef_s64(), svundef_s64()); - // expected-error@+2 {{'svhsubr_n_s64_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svhsubr_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svhsubr_n_s64_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhsubr_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhsubr,_n_s64,_z,)(pg, svundef_s64(), i64); - // expected-error@+2 {{'svhsubr_n_s64_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svhsubr_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svhsubr_n_s64_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhsubr_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhsubr,_n_s64,_m,)(pg, svundef_s64(), i64); - // expected-error@+2 {{'svhsubr_n_s64_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svhsubr_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svhsubr_n_s64_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhsubr_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhsubr,_n_s64,_x,)(pg, svundef_s64(), i64); - // expected-error@+2 {{'svhistcnt_s64_z' needs target feature sve2}} - // overload-error@+1 {{'svhistcnt_z' needs target feature sve2}} + // expected-error@+2 {{'svhistcnt_s64_z' needs target feature sve,sve2}} + // overload-error@+1 {{'svhistcnt_z' needs target feature sve,sve2}} SVE_ACLE_FUNC(svhistcnt,_s64,_z,)(pg, svundef_s64(), svundef_s64()); - // expected-error@+2 {{'sveortb_s64' needs target feature sve2|sme}} - // overload-error@+1 {{'sveortb' needs target feature sve2|sme}} + // expected-error@+2 {{'sveortb_s64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'sveortb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(sveortb,_s64,,)(svundef_s64(), svundef_s64(), svundef_s64()); - // expected-error@+2 {{'sveortb_n_s64' needs target feature sve2|sme}} - // overload-error@+1 {{'sveortb' needs target feature sve2|sme}} + // expected-error@+2 {{'sveortb_n_s64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'sveortb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(sveortb,_n_s64,,)(svundef_s64(), svundef_s64(), i64); - // expected-error@+2 {{'svqxtnb_s64' needs target feature sve2|sme}} - // overload-error@+1 {{'svqxtnb' needs target feature sve2|sme}} + // expected-error@+2 {{'svqxtnb_s64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqxtnb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqxtnb,_s64,,)(svundef_s64()); - // expected-error@+2 {{'svmlalt_s64' needs target feature sve2|sme}} - // overload-error@+1 {{'svmlalt' needs target feature sve2|sme}} + // expected-error@+2 {{'svmlalt_s64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svmlalt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svmlalt,_s64,,)(svundef_s64(), svundef_s32(), svundef_s32()); - // expected-error@+2 {{'svmlalt_n_s64' needs target feature sve2|sme}} - // overload-error@+1 {{'svmlalt' needs target feature sve2|sme}} + // expected-error@+2 {{'svmlalt_n_s64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svmlalt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svmlalt,_n_s64,,)(svundef_s64(), svundef_s32(), i32); - // expected-error@+2 {{'svaddhnt_s64' needs target feature sve2|sme}} - // overload-error@+1 {{'svaddhnt' needs target feature sve2|sme}} + // expected-error@+2 {{'svaddhnt_s64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svaddhnt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svaddhnt,_s64,,)(svundef_s32(), svundef_s64(), svundef_s64()); - // expected-error@+2 {{'svaddhnt_n_s64' needs target feature sve2|sme}} - // overload-error@+1 {{'svaddhnt' needs target feature sve2|sme}} + // expected-error@+2 {{'svaddhnt_n_s64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svaddhnt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svaddhnt,_n_s64,,)(svundef_s32(), svundef_s64(), i64); - // expected-error@+2 {{'svldnt1uh_gather_u64base_s64' needs target feature sve2}} - // overload-error@+1 {{'svldnt1uh_gather_s64' needs target feature sve2}} + // expected-error@+2 {{'svldnt1uh_gather_u64base_s64' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1uh_gather_s64' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1uh_gather, _u64base, _s64, )(pg, svundef_u64()); - // expected-error@+2 {{'svldnt1uh_gather_s64offset_s64' needs target feature sve2}} - // overload-error@+1 {{'svldnt1uh_gather_offset_s64' needs target feature sve2}} + // expected-error@+2 {{'svldnt1uh_gather_s64offset_s64' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1uh_gather_offset_s64' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1uh_gather_, s64, offset_s64, )(pg, const_u16_ptr, svundef_s64()); - // expected-error@+2 {{'svldnt1uh_gather_u64offset_s64' needs target feature sve2}} - // overload-error@+1 {{'svldnt1uh_gather_offset_s64' needs target feature sve2}} + // expected-error@+2 {{'svldnt1uh_gather_u64offset_s64' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1uh_gather_offset_s64' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1uh_gather_, u64, offset_s64, )(pg, const_u16_ptr, svundef_u64()); - // expected-error@+2 {{'svldnt1uh_gather_u64base_offset_s64' needs target feature sve2}} - // overload-error@+1 {{'svldnt1uh_gather_offset_s64' needs target feature sve2}} + // expected-error@+2 {{'svldnt1uh_gather_u64base_offset_s64' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1uh_gather_offset_s64' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1uh_gather, _u64base, _offset_s64, )(pg, svundef_u64(), i64); - // expected-error@+2 {{'svldnt1uh_gather_s64index_s64' needs target feature sve2}} - // overload-error@+1 {{'svldnt1uh_gather_index_s64' needs target feature sve2}} + // expected-error@+2 {{'svldnt1uh_gather_s64index_s64' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1uh_gather_index_s64' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1uh_gather_, s64, index_s64, )(pg, const_u16_ptr, svundef_s64()); - // expected-error@+2 {{'svldnt1uh_gather_u64index_s64' needs target feature sve2}} - // overload-error@+1 {{'svldnt1uh_gather_index_s64' needs target feature sve2}} + // expected-error@+2 {{'svldnt1uh_gather_u64index_s64' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1uh_gather_index_s64' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1uh_gather_, u64, index_s64, )(pg, const_u16_ptr, svundef_u64()); - // expected-error@+2 {{'svldnt1uh_gather_u64base_index_s64' needs target feature sve2}} - // overload-error@+1 {{'svldnt1uh_gather_index_s64' needs target feature sve2}} + // expected-error@+2 {{'svldnt1uh_gather_u64base_index_s64' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1uh_gather_index_s64' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1uh_gather, _u64base, _index_s64, )(pg, svundef_u64(), i64); - // expected-error@+2 {{'svqdmlalt_s64' needs target feature sve2|sme}} - // overload-error@+1 {{'svqdmlalt' needs target feature sve2|sme}} + // expected-error@+2 {{'svqdmlalt_s64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqdmlalt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqdmlalt,_s64,,)(svundef_s64(), svundef_s32(), svundef_s32()); - // expected-error@+2 {{'svqdmlalt_n_s64' needs target feature sve2|sme}} - // overload-error@+1 {{'svqdmlalt' needs target feature sve2|sme}} + // expected-error@+2 {{'svqdmlalt_n_s64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqdmlalt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqdmlalt,_n_s64,,)(svundef_s64(), svundef_s32(), i32); - // expected-error@+2 {{'svbcax_s64' needs target feature sve2|sme}} - // overload-error@+1 {{'svbcax' needs target feature sve2|sme}} + // expected-error@+2 {{'svbcax_s64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svbcax' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svbcax,_s64,,)(svundef_s64(), svundef_s64(), svundef_s64()); - // expected-error@+2 {{'svbcax_n_s64' needs target feature sve2|sme}} - // overload-error@+1 {{'svbcax' needs target feature sve2|sme}} + // expected-error@+2 {{'svbcax_n_s64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svbcax' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svbcax,_n_s64,,)(svundef_s64(), svundef_s64(), i64); - // expected-error@+2 {{'svqxtnt_s64' needs target feature sve2|sme}} - // overload-error@+1 {{'svqxtnt' needs target feature sve2|sme}} + // expected-error@+2 {{'svqxtnt_s64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqxtnt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqxtnt,_s64,,)(svundef_s32(), svundef_s64()); - // expected-error@+2 {{'svqdmlalb_s64' needs target feature sve2|sme}} - // overload-error@+1 {{'svqdmlalb' needs target feature sve2|sme}} + // expected-error@+2 {{'svqdmlalb_s64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqdmlalb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqdmlalb,_s64,,)(svundef_s64(), svundef_s32(), svundef_s32()); - // expected-error@+2 {{'svqdmlalb_n_s64' needs target feature sve2|sme}} - // overload-error@+1 {{'svqdmlalb' needs target feature sve2|sme}} + // expected-error@+2 {{'svqdmlalb_n_s64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqdmlalb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqdmlalb,_n_s64,,)(svundef_s64(), svundef_s32(), i32); - // expected-error@+2 {{'svqrshl_s64_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svqrshl_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svqrshl_s64_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqrshl_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqrshl,_s64,_z,)(pg, svundef_s64(), svundef_s64()); - // expected-error@+2 {{'svqrshl_s64_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svqrshl_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svqrshl_s64_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqrshl_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqrshl,_s64,_m,)(pg, svundef_s64(), svundef_s64()); - // expected-error@+2 {{'svqrshl_s64_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svqrshl_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svqrshl_s64_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqrshl_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqrshl,_s64,_x,)(pg, svundef_s64(), svundef_s64()); - // expected-error@+2 {{'svqrshl_n_s64_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svqrshl_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svqrshl_n_s64_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqrshl_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqrshl,_n_s64,_z,)(pg, svundef_s64(), i64); - // expected-error@+2 {{'svqrshl_n_s64_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svqrshl_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svqrshl_n_s64_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqrshl_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqrshl,_n_s64,_m,)(pg, svundef_s64(), i64); - // expected-error@+2 {{'svqrshl_n_s64_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svqrshl_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svqrshl_n_s64_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqrshl_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqrshl,_n_s64,_x,)(pg, svundef_s64(), i64); - // expected-error@+2 {{'svsublbt_s64' needs target feature sve2|sme}} - // overload-error@+1 {{'svsublbt' needs target feature sve2|sme}} + // expected-error@+2 {{'svsublbt_s64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svsublbt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svsublbt,_s64,,)(svundef_s32(), svundef_s32()); - // expected-error@+2 {{'svsublbt_n_s64' needs target feature sve2|sme}} - // overload-error@+1 {{'svsublbt' needs target feature sve2|sme}} + // expected-error@+2 {{'svsublbt_n_s64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svsublbt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svsublbt,_n_s64,,)(svundef_s32(), i32); - // expected-error@+2 {{'svqdmullt_s64' needs target feature sve2|sme}} - // overload-error@+1 {{'svqdmullt' needs target feature sve2|sme}} + // expected-error@+2 {{'svqdmullt_s64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqdmullt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqdmullt,_s64,,)(svundef_s32(), svundef_s32()); - // expected-error@+2 {{'svqdmullt_n_s64' needs target feature sve2|sme}} - // overload-error@+1 {{'svqdmullt' needs target feature sve2|sme}} + // expected-error@+2 {{'svqdmullt_n_s64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqdmullt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqdmullt,_n_s64,,)(svundef_s32(), i32); - // expected-error@+2 {{'svsublt_s64' needs target feature sve2|sme}} - // overload-error@+1 {{'svsublt' needs target feature sve2|sme}} + // expected-error@+2 {{'svsublt_s64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svsublt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svsublt,_s64,,)(svundef_s32(), svundef_s32()); - // expected-error@+2 {{'svsublt_n_s64' needs target feature sve2|sme}} - // overload-error@+1 {{'svsublt' needs target feature sve2|sme}} + // expected-error@+2 {{'svsublt_n_s64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svsublt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svsublt,_n_s64,,)(svundef_s32(), i32); - // expected-error@+2 {{'svqdmlslbt_s64' needs target feature sve2|sme}} - // overload-error@+1 {{'svqdmlslbt' needs target feature sve2|sme}} + // expected-error@+2 {{'svqdmlslbt_s64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqdmlslbt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqdmlslbt,_s64,,)(svundef_s64(), svundef_s32(), svundef_s32()); - // expected-error@+2 {{'svqdmlslbt_n_s64' needs target feature sve2|sme}} - // overload-error@+1 {{'svqdmlslbt' needs target feature sve2|sme}} + // expected-error@+2 {{'svqdmlslbt_n_s64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqdmlslbt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqdmlslbt,_n_s64,,)(svundef_s64(), svundef_s32(), i32); - // expected-error@+2 {{'svadalp_s64_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svadalp_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svadalp_s64_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svadalp_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svadalp,_s64,_z,)(pg, svundef_s64(), svundef_s32()); - // expected-error@+2 {{'svadalp_s64_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svadalp_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svadalp_s64_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svadalp_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svadalp,_s64,_m,)(pg, svundef_s64(), svundef_s32()); - // expected-error@+2 {{'svadalp_s64_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svadalp_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svadalp_s64_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svadalp_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svadalp,_s64,_x,)(pg, svundef_s64(), svundef_s32()); - // expected-error@+2 {{'svwhilege_b8_s64' needs target feature sve2|sme}} - // overload-error@+1 {{'svwhilege_b8' needs target feature sve2|sme}} + // expected-error@+2 {{'svwhilege_b8_s64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svwhilege_b8' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svwhilege_b8,_s64,,)(i64, i64); - // expected-error@+2 {{'svwhilege_b16_s64' needs target feature sve2|sme}} - // overload-error@+1 {{'svwhilege_b16' needs target feature sve2|sme}} + // expected-error@+2 {{'svwhilege_b16_s64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svwhilege_b16' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svwhilege_b16,_s64,,)(i64, i64); - // expected-error@+2 {{'svwhilege_b32_s64' needs target feature sve2|sme}} - // overload-error@+1 {{'svwhilege_b32' needs target feature sve2|sme}} + // expected-error@+2 {{'svwhilege_b32_s64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svwhilege_b32' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svwhilege_b32,_s64,,)(i64, i64); - // expected-error@+2 {{'svwhilege_b64_s64' needs target feature sve2|sme}} - // overload-error@+1 {{'svwhilege_b64' needs target feature sve2|sme}} + // expected-error@+2 {{'svwhilege_b64_s64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svwhilege_b64' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svwhilege_b64,_s64,,)(i64, i64); - // expected-error@+2 {{'svsubwt_s64' needs target feature sve2|sme}} - // overload-error@+1 {{'svsubwt' needs target feature sve2|sme}} + // expected-error@+2 {{'svsubwt_s64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svsubwt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svsubwt,_s64,,)(svundef_s64(), svundef_s32()); - // expected-error@+2 {{'svsubwt_n_s64' needs target feature sve2|sme}} - // overload-error@+1 {{'svsubwt' needs target feature sve2|sme}} + // expected-error@+2 {{'svsubwt_n_s64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svsubwt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svsubwt,_n_s64,,)(svundef_s64(), i32); - // expected-error@+2 {{'svqsubr_s64_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svqsubr_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svqsubr_s64_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqsubr_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqsubr,_s64,_z,)(pg, svundef_s64(), svundef_s64()); - // expected-error@+2 {{'svqsubr_s64_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svqsubr_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svqsubr_s64_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqsubr_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqsubr,_s64,_m,)(pg, svundef_s64(), svundef_s64()); - // expected-error@+2 {{'svqsubr_s64_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svqsubr_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svqsubr_s64_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqsubr_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqsubr,_s64,_x,)(pg, svundef_s64(), svundef_s64()); - // expected-error@+2 {{'svqsubr_n_s64_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svqsubr_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svqsubr_n_s64_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqsubr_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqsubr,_n_s64,_z,)(pg, svundef_s64(), i64); - // expected-error@+2 {{'svqsubr_n_s64_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svqsubr_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svqsubr_n_s64_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqsubr_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqsubr,_n_s64,_m,)(pg, svundef_s64(), i64); - // expected-error@+2 {{'svqsubr_n_s64_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svqsubr_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svqsubr_n_s64_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqsubr_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqsubr,_n_s64,_x,)(pg, svundef_s64(), i64); - // expected-error@+2 {{'svaddp_s64_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svaddp_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svaddp_s64_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svaddp_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svaddp,_s64,_m,)(pg, svundef_s64(), svundef_s64()); - // expected-error@+2 {{'svaddp_s64_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svaddp_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svaddp_s64_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svaddp_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svaddp,_s64,_x,)(pg, svundef_s64(), svundef_s64()); - // expected-error@+2 {{'svqadd_s64_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svqadd_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svqadd_s64_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqadd_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqadd,_s64,_m,)(pg, svundef_s64(), svundef_s64()); - // expected-error@+2 {{'svqadd_n_s64_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svqadd_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svqadd_n_s64_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqadd_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqadd,_n_s64,_m,)(pg, svundef_s64(), i64); - // expected-error@+2 {{'svqadd_s64_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svqadd_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svqadd_s64_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqadd_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqadd,_s64,_z,)(pg, svundef_s64(), svundef_s64()); - // expected-error@+2 {{'svqadd_n_s64_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svqadd_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svqadd_n_s64_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqadd_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqadd,_n_s64,_z,)(pg, svundef_s64(), i64); - // expected-error@+2 {{'svqadd_s64_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svqadd_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svqadd_s64_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqadd_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqadd,_s64,_x,)(pg, svundef_s64(), svundef_s64()); - // expected-error@+2 {{'svqadd_n_s64_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svqadd_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svqadd_n_s64_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqadd_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqadd,_n_s64,_x,)(pg, svundef_s64(), i64); - // expected-error@+2 {{'svabdlb_s64' needs target feature sve2|sme}} - // overload-error@+1 {{'svabdlb' needs target feature sve2|sme}} + // expected-error@+2 {{'svabdlb_s64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svabdlb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svabdlb,_s64,,)(svundef_s32(), svundef_s32()); - // expected-error@+2 {{'svabdlb_n_s64' needs target feature sve2|sme}} - // overload-error@+1 {{'svabdlb' needs target feature sve2|sme}} + // expected-error@+2 {{'svabdlb_n_s64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svabdlb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svabdlb,_n_s64,,)(svundef_s32(), i32); - // expected-error@+2 {{'svtbx_s64' needs target feature sve2|sme}} - // overload-error@+1 {{'svtbx' needs target feature sve2|sme}} + // expected-error@+2 {{'svtbx_s64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svtbx' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svtbx,_s64,,)(svundef_s64(), svundef_s64(), svundef_u64()); - // expected-error@+2 {{'svabdlt_s64' needs target feature sve2|sme}} - // overload-error@+1 {{'svabdlt' needs target feature sve2|sme}} + // expected-error@+2 {{'svabdlt_s64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svabdlt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svabdlt,_s64,,)(svundef_s32(), svundef_s32()); - // expected-error@+2 {{'svabdlt_n_s64' needs target feature sve2|sme}} - // overload-error@+1 {{'svabdlt' needs target feature sve2|sme}} + // expected-error@+2 {{'svabdlt_n_s64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svabdlt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svabdlt,_n_s64,,)(svundef_s32(), i32); - // expected-error@+2 {{'svminp_s64_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svminp_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svminp_s64_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svminp_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svminp,_s64,_m,)(pg, svundef_s64(), svundef_s64()); - // expected-error@+2 {{'svminp_s64_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svminp_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svminp_s64_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svminp_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svminp,_s64,_x,)(pg, svundef_s64(), svundef_s64()); - // expected-error@+2 {{'svqsub_s64_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svqsub_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svqsub_s64_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqsub_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqsub,_s64,_z,)(pg, svundef_s64(), svundef_s64()); - // expected-error@+2 {{'svqsub_s64_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svqsub_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svqsub_s64_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqsub_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqsub,_s64,_m,)(pg, svundef_s64(), svundef_s64()); - // expected-error@+2 {{'svqsub_s64_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svqsub_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svqsub_s64_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqsub_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqsub,_s64,_x,)(pg, svundef_s64(), svundef_s64()); - // expected-error@+2 {{'svqsub_n_s64_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svqsub_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svqsub_n_s64_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqsub_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqsub,_n_s64,_z,)(pg, svundef_s64(), i64); - // expected-error@+2 {{'svqsub_n_s64_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svqsub_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svqsub_n_s64_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqsub_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqsub,_n_s64,_m,)(pg, svundef_s64(), i64); - // expected-error@+2 {{'svqsub_n_s64_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svqsub_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svqsub_n_s64_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqsub_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqsub,_n_s64,_x,)(pg, svundef_s64(), i64); - // expected-error@+2 {{'svrsubhnb_s64' needs target feature sve2|sme}} - // overload-error@+1 {{'svrsubhnb' needs target feature sve2|sme}} + // expected-error@+2 {{'svrsubhnb_s64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrsubhnb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrsubhnb,_s64,,)(svundef_s64(), svundef_s64()); - // expected-error@+2 {{'svrsubhnb_n_s64' needs target feature sve2|sme}} - // overload-error@+1 {{'svrsubhnb' needs target feature sve2|sme}} + // expected-error@+2 {{'svrsubhnb_n_s64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrsubhnb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrsubhnb,_n_s64,,)(svundef_s64(), i64); - // expected-error@+2 {{'svaddhnb_s64' needs target feature sve2|sme}} - // overload-error@+1 {{'svaddhnb' needs target feature sve2|sme}} + // expected-error@+2 {{'svaddhnb_s64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svaddhnb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svaddhnb,_s64,,)(svundef_s64(), svundef_s64()); - // expected-error@+2 {{'svaddhnb_n_s64' needs target feature sve2|sme}} - // overload-error@+1 {{'svaddhnb' needs target feature sve2|sme}} + // expected-error@+2 {{'svaddhnb_n_s64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svaddhnb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svaddhnb,_n_s64,,)(svundef_s64(), i64); - // expected-error@+2 {{'svabalt_s64' needs target feature sve2|sme}} - // overload-error@+1 {{'svabalt' needs target feature sve2|sme}} + // expected-error@+2 {{'svabalt_s64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svabalt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svabalt,_s64,,)(svundef_s64(), svundef_s32(), svundef_s32()); - // expected-error@+2 {{'svabalt_n_s64' needs target feature sve2|sme}} - // overload-error@+1 {{'svabalt' needs target feature sve2|sme}} + // expected-error@+2 {{'svabalt_n_s64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svabalt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svabalt,_n_s64,,)(svundef_s64(), svundef_s32(), i32); - // expected-error@+2 {{'sveor3_s64' needs target feature sve2|sme}} - // overload-error@+1 {{'sveor3' needs target feature sve2|sme}} + // expected-error@+2 {{'sveor3_s64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'sveor3' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(sveor3,_s64,,)(svundef_s64(), svundef_s64(), svundef_s64()); - // expected-error@+2 {{'sveor3_n_s64' needs target feature sve2|sme}} - // overload-error@+1 {{'sveor3' needs target feature sve2|sme}} + // expected-error@+2 {{'sveor3_n_s64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'sveor3' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(sveor3,_n_s64,,)(svundef_s64(), svundef_s64(), i64); - // expected-error@+2 {{'svhadd_s64_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svhadd_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svhadd_s64_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhadd_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhadd,_s64,_m,)(pg, svundef_s64(), svundef_s64()); - // expected-error@+2 {{'svhadd_n_s64_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svhadd_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svhadd_n_s64_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhadd_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhadd,_n_s64,_m,)(pg, svundef_s64(), i64); - // expected-error@+2 {{'svhadd_s64_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svhadd_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svhadd_s64_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhadd_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhadd,_s64,_z,)(pg, svundef_s64(), svundef_s64()); - // expected-error@+2 {{'svhadd_n_s64_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svhadd_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svhadd_n_s64_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhadd_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhadd,_n_s64,_z,)(pg, svundef_s64(), i64); - // expected-error@+2 {{'svhadd_s64_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svhadd_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svhadd_s64_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhadd_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhadd,_s64,_x,)(pg, svundef_s64(), svundef_s64()); - // expected-error@+2 {{'svhadd_n_s64_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svhadd_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svhadd_n_s64_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhadd_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhadd,_n_s64,_x,)(pg, svundef_s64(), i64); - // expected-error@+2 {{'svmovlb_s64' needs target feature sve2|sme}} - // overload-error@+1 {{'svmovlb' needs target feature sve2|sme}} + // expected-error@+2 {{'svmovlb_s64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svmovlb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svmovlb,_s64,,)(svundef_s32()); - // expected-error@+2 {{'svstnt1_scatter_u64base_s64' needs target feature sve2}} - // overload-error@+1 {{'svstnt1_scatter' needs target feature sve2}} + // expected-error@+2 {{'svstnt1_scatter_u64base_s64' needs target feature sve,sve2}} + // overload-error@+1 {{'svstnt1_scatter' needs target feature sve,sve2}} SVE_ACLE_FUNC(svstnt1_scatter, _u64base, , _s64)(pg, svundef_u64(), svundef_s64()); - // expected-error@+2 {{'svstnt1_scatter_s64offset_s64' needs target feature sve2}} - // overload-error@+1 {{'svstnt1_scatter_offset' needs target feature sve2}} + // expected-error@+2 {{'svstnt1_scatter_s64offset_s64' needs target feature sve,sve2}} + // overload-error@+1 {{'svstnt1_scatter_offset' needs target feature sve,sve2}} SVE_ACLE_FUNC(svstnt1_scatter_, s64, offset, _s64)(pg, i64_ptr, svundef_s64(), svundef_s64()); - // expected-error@+2 {{'svstnt1_scatter_u64offset_s64' needs target feature sve2}} - // overload-error@+1 {{'svstnt1_scatter_offset' needs target feature sve2}} + // expected-error@+2 {{'svstnt1_scatter_u64offset_s64' needs target feature sve,sve2}} + // overload-error@+1 {{'svstnt1_scatter_offset' needs target feature sve,sve2}} SVE_ACLE_FUNC(svstnt1_scatter_, u64, offset, _s64)(pg, i64_ptr, svundef_u64(), svundef_s64()); - // expected-error@+2 {{'svstnt1_scatter_u64base_offset_s64' needs target feature sve2}} - // overload-error@+1 {{'svstnt1_scatter_offset' needs target feature sve2}} + // expected-error@+2 {{'svstnt1_scatter_u64base_offset_s64' needs target feature sve,sve2}} + // overload-error@+1 {{'svstnt1_scatter_offset' needs target feature sve,sve2}} SVE_ACLE_FUNC(svstnt1_scatter, _u64base, _offset, _s64)(pg, svundef_u64(), i64, svundef_s64()); - // expected-error@+2 {{'svstnt1_scatter_s64index_s64' needs target feature sve2}} - // overload-error@+1 {{'svstnt1_scatter_index' needs target feature sve2}} + // expected-error@+2 {{'svstnt1_scatter_s64index_s64' needs target feature sve,sve2}} + // overload-error@+1 {{'svstnt1_scatter_index' needs target feature sve,sve2}} SVE_ACLE_FUNC(svstnt1_scatter_, s64, index, _s64)(pg, i64_ptr, svundef_s64(), svundef_s64()); - // expected-error@+2 {{'svstnt1_scatter_u64index_s64' needs target feature sve2}} - // overload-error@+1 {{'svstnt1_scatter_index' needs target feature sve2}} + // expected-error@+2 {{'svstnt1_scatter_u64index_s64' needs target feature sve,sve2}} + // overload-error@+1 {{'svstnt1_scatter_index' needs target feature sve,sve2}} SVE_ACLE_FUNC(svstnt1_scatter_, u64, index, _s64)(pg, i64_ptr, svundef_u64(), svundef_s64()); - // expected-error@+2 {{'svstnt1_scatter_u64base_index_s64' needs target feature sve2}} - // overload-error@+1 {{'svstnt1_scatter_index' needs target feature sve2}} + // expected-error@+2 {{'svstnt1_scatter_u64base_index_s64' needs target feature sve,sve2}} + // overload-error@+1 {{'svstnt1_scatter_index' needs target feature sve,sve2}} SVE_ACLE_FUNC(svstnt1_scatter, _u64base, _index, _s64)(pg, svundef_u64(), i64, svundef_s64()); - // expected-error@+2 {{'svqrdmlsh_s64' needs target feature sve2|sme}} - // overload-error@+1 {{'svqrdmlsh' needs target feature sve2|sme}} + // expected-error@+2 {{'svqrdmlsh_s64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqrdmlsh' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqrdmlsh,_s64,,)(svundef_s64(), svundef_s64(), svundef_s64()); - // expected-error@+2 {{'svqrdmlsh_n_s64' needs target feature sve2|sme}} - // overload-error@+1 {{'svqrdmlsh' needs target feature sve2|sme}} + // expected-error@+2 {{'svqrdmlsh_n_s64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqrdmlsh' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqrdmlsh,_n_s64,,)(svundef_s64(), svundef_s64(), i64); - // expected-error@+2 {{'svqdmlslt_s64' needs target feature sve2|sme}} - // overload-error@+1 {{'svqdmlslt' needs target feature sve2|sme}} + // expected-error@+2 {{'svqdmlslt_s64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqdmlslt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqdmlslt,_s64,,)(svundef_s64(), svundef_s32(), svundef_s32()); - // expected-error@+2 {{'svqdmlslt_n_s64' needs target feature sve2|sme}} - // overload-error@+1 {{'svqdmlslt' needs target feature sve2|sme}} + // expected-error@+2 {{'svqdmlslt_n_s64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqdmlslt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqdmlslt,_n_s64,,)(svundef_s64(), svundef_s32(), i32); - // expected-error@+2 {{'svmaxp_s64_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svmaxp_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svmaxp_s64_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svmaxp_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svmaxp,_s64,_m,)(pg, svundef_s64(), svundef_s64()); - // expected-error@+2 {{'svmaxp_s64_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svmaxp_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svmaxp_s64_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svmaxp_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svmaxp,_s64,_x,)(pg, svundef_s64(), svundef_s64()); - // expected-error@+2 {{'svmullt_s64' needs target feature sve2|sme}} - // overload-error@+1 {{'svmullt' needs target feature sve2|sme}} + // expected-error@+2 {{'svmullt_s64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svmullt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svmullt,_s64,,)(svundef_s32(), svundef_s32()); - // expected-error@+2 {{'svmullt_n_s64' needs target feature sve2|sme}} - // overload-error@+1 {{'svmullt' needs target feature sve2|sme}} + // expected-error@+2 {{'svmullt_n_s64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svmullt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svmullt,_n_s64,,)(svundef_s32(), i32); - // expected-error@+2 {{'svldnt1sh_gather_u64base_s64' needs target feature sve2}} - // overload-error@+1 {{'svldnt1sh_gather_s64' needs target feature sve2}} + // expected-error@+2 {{'svldnt1sh_gather_u64base_s64' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1sh_gather_s64' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1sh_gather, _u64base, _s64, )(pg, svundef_u64()); - // expected-error@+2 {{'svldnt1sh_gather_s64offset_s64' needs target feature sve2}} - // overload-error@+1 {{'svldnt1sh_gather_offset_s64' needs target feature sve2}} + // expected-error@+2 {{'svldnt1sh_gather_s64offset_s64' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1sh_gather_offset_s64' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1sh_gather_, s64, offset_s64, )(pg, const_i16_ptr, svundef_s64()); - // expected-error@+2 {{'svldnt1sh_gather_u64offset_s64' needs target feature sve2}} - // overload-error@+1 {{'svldnt1sh_gather_offset_s64' needs target feature sve2}} + // expected-error@+2 {{'svldnt1sh_gather_u64offset_s64' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1sh_gather_offset_s64' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1sh_gather_, u64, offset_s64, )(pg, const_i16_ptr, svundef_u64()); - // expected-error@+2 {{'svldnt1sh_gather_u64base_offset_s64' needs target feature sve2}} - // overload-error@+1 {{'svldnt1sh_gather_offset_s64' needs target feature sve2}} + // expected-error@+2 {{'svldnt1sh_gather_u64base_offset_s64' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1sh_gather_offset_s64' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1sh_gather, _u64base, _offset_s64, )(pg, svundef_u64(), i64); - // expected-error@+2 {{'svldnt1sh_gather_s64index_s64' needs target feature sve2}} - // overload-error@+1 {{'svldnt1sh_gather_index_s64' needs target feature sve2}} + // expected-error@+2 {{'svldnt1sh_gather_s64index_s64' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1sh_gather_index_s64' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1sh_gather_, s64, index_s64, )(pg, const_i16_ptr, svundef_s64()); - // expected-error@+2 {{'svldnt1sh_gather_u64index_s64' needs target feature sve2}} - // overload-error@+1 {{'svldnt1sh_gather_index_s64' needs target feature sve2}} + // expected-error@+2 {{'svldnt1sh_gather_u64index_s64' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1sh_gather_index_s64' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1sh_gather_, u64, index_s64, )(pg, const_i16_ptr, svundef_u64()); - // expected-error@+2 {{'svldnt1sh_gather_u64base_index_s64' needs target feature sve2}} - // overload-error@+1 {{'svldnt1sh_gather_index_s64' needs target feature sve2}} + // expected-error@+2 {{'svldnt1sh_gather_u64base_index_s64' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1sh_gather_index_s64' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1sh_gather, _u64base, _index_s64, )(pg, svundef_u64(), i64); - // expected-error@+2 {{'svqxtunb_s64' needs target feature sve2|sme}} - // overload-error@+1 {{'svqxtunb' needs target feature sve2|sme}} + // expected-error@+2 {{'svqxtunb_s64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqxtunb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqxtunb,_s64,,)(svundef_s64()); - // expected-error@+2 {{'svwhilerw_s64' needs target feature sve2|sme}} - // overload-error@+1 {{'svwhilerw' needs target feature sve2|sme}} + // expected-error@+2 {{'svwhilerw_s64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svwhilerw' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svwhilerw,_s64,,)(const_i64_ptr, const_i64_ptr); - // expected-error@+2 {{'svrhadd_s64_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svrhadd_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svrhadd_s64_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrhadd_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrhadd,_s64,_m,)(pg, svundef_s64(), svundef_s64()); - // expected-error@+2 {{'svrhadd_n_s64_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svrhadd_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svrhadd_n_s64_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrhadd_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrhadd,_n_s64,_m,)(pg, svundef_s64(), i64); - // expected-error@+2 {{'svrhadd_s64_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svrhadd_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svrhadd_s64_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrhadd_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrhadd,_s64,_z,)(pg, svundef_s64(), svundef_s64()); - // expected-error@+2 {{'svrhadd_n_s64_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svrhadd_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svrhadd_n_s64_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrhadd_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrhadd,_n_s64,_z,)(pg, svundef_s64(), i64); - // expected-error@+2 {{'svrhadd_s64_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svrhadd_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svrhadd_s64_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrhadd_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrhadd,_s64,_x,)(pg, svundef_s64(), svundef_s64()); - // expected-error@+2 {{'svrhadd_n_s64_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svrhadd_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svrhadd_n_s64_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrhadd_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrhadd,_n_s64,_x,)(pg, svundef_s64(), i64); - // expected-error@+2 {{'svraddhnb_s64' needs target feature sve2|sme}} - // overload-error@+1 {{'svraddhnb' needs target feature sve2|sme}} + // expected-error@+2 {{'svraddhnb_s64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svraddhnb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svraddhnb,_s64,,)(svundef_s64(), svundef_s64()); - // expected-error@+2 {{'svraddhnb_n_s64' needs target feature sve2|sme}} - // overload-error@+1 {{'svraddhnb' needs target feature sve2|sme}} + // expected-error@+2 {{'svraddhnb_n_s64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svraddhnb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svraddhnb,_n_s64,,)(svundef_s64(), i64); - // expected-error@+2 {{'svwhilewr_s64' needs target feature sve2|sme}} - // overload-error@+1 {{'svwhilewr' needs target feature sve2|sme}} + // expected-error@+2 {{'svwhilewr_s64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svwhilewr' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svwhilewr,_s64,,)(const_i64_ptr, const_i64_ptr); - // expected-error@+2 {{'svmlalb_s64' needs target feature sve2|sme}} - // overload-error@+1 {{'svmlalb' needs target feature sve2|sme}} + // expected-error@+2 {{'svmlalb_s64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svmlalb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svmlalb,_s64,,)(svundef_s64(), svundef_s32(), svundef_s32()); - // expected-error@+2 {{'svmlalb_n_s64' needs target feature sve2|sme}} - // overload-error@+1 {{'svmlalb' needs target feature sve2|sme}} + // expected-error@+2 {{'svmlalb_n_s64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svmlalb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svmlalb,_n_s64,,)(svundef_s64(), svundef_s32(), i32); - // expected-error@+2 {{'svldnt1sb_gather_u64base_s64' needs target feature sve2}} - // overload-error@+1 {{'svldnt1sb_gather_s64' needs target feature sve2}} + // expected-error@+2 {{'svldnt1sb_gather_u64base_s64' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1sb_gather_s64' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1sb_gather, _u64base, _s64, )(pg, svundef_u64()); - // expected-error@+2 {{'svldnt1sb_gather_s64offset_s64' needs target feature sve2}} - // overload-error@+1 {{'svldnt1sb_gather_offset_s64' needs target feature sve2}} + // expected-error@+2 {{'svldnt1sb_gather_s64offset_s64' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1sb_gather_offset_s64' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1sb_gather_, s64, offset_s64, )(pg, const_i8_ptr, svundef_s64()); - // expected-error@+2 {{'svldnt1sb_gather_u64offset_s64' needs target feature sve2}} - // overload-error@+1 {{'svldnt1sb_gather_offset_s64' needs target feature sve2}} + // expected-error@+2 {{'svldnt1sb_gather_u64offset_s64' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1sb_gather_offset_s64' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1sb_gather_, u64, offset_s64, )(pg, const_i8_ptr, svundef_u64()); - // expected-error@+2 {{'svldnt1sb_gather_u64base_offset_s64' needs target feature sve2}} - // overload-error@+1 {{'svldnt1sb_gather_offset_s64' needs target feature sve2}} + // expected-error@+2 {{'svldnt1sb_gather_u64base_offset_s64' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1sb_gather_offset_s64' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1sb_gather, _u64base, _offset_s64, )(pg, svundef_u64(), i64); - // expected-error@+2 {{'svsubwb_s64' needs target feature sve2|sme}} - // overload-error@+1 {{'svsubwb' needs target feature sve2|sme}} + // expected-error@+2 {{'svsubwb_s64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svsubwb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svsubwb,_s64,,)(svundef_s64(), svundef_s32()); - // expected-error@+2 {{'svsubwb_n_s64' needs target feature sve2|sme}} - // overload-error@+1 {{'svsubwb' needs target feature sve2|sme}} + // expected-error@+2 {{'svsubwb_n_s64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svsubwb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svsubwb,_n_s64,,)(svundef_s64(), i32); - // expected-error@+2 {{'svldnt1ub_gather_u64base_s64' needs target feature sve2}} - // overload-error@+1 {{'svldnt1ub_gather_s64' needs target feature sve2}} + // expected-error@+2 {{'svldnt1ub_gather_u64base_s64' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1ub_gather_s64' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1ub_gather, _u64base, _s64, )(pg, svundef_u64()); - // expected-error@+2 {{'svldnt1ub_gather_s64offset_s64' needs target feature sve2}} - // overload-error@+1 {{'svldnt1ub_gather_offset_s64' needs target feature sve2}} + // expected-error@+2 {{'svldnt1ub_gather_s64offset_s64' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1ub_gather_offset_s64' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1ub_gather_, s64, offset_s64, )(pg, const_u8_ptr, svundef_s64()); - // expected-error@+2 {{'svldnt1ub_gather_u64offset_s64' needs target feature sve2}} - // overload-error@+1 {{'svldnt1ub_gather_offset_s64' needs target feature sve2}} + // expected-error@+2 {{'svldnt1ub_gather_u64offset_s64' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1ub_gather_offset_s64' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1ub_gather_, u64, offset_s64, )(pg, const_u8_ptr, svundef_u64()); - // expected-error@+2 {{'svldnt1ub_gather_u64base_offset_s64' needs target feature sve2}} - // overload-error@+1 {{'svldnt1ub_gather_offset_s64' needs target feature sve2}} + // expected-error@+2 {{'svldnt1ub_gather_u64base_offset_s64' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1ub_gather_offset_s64' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1ub_gather, _u64base, _offset_s64, )(pg, svundef_u64(), i64); - // expected-error@+2 {{'svaba_s64' needs target feature sve2|sme}} - // overload-error@+1 {{'svaba' needs target feature sve2|sme}} + // expected-error@+2 {{'svaba_s64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svaba' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svaba,_s64,,)(svundef_s64(), svundef_s64(), svundef_s64()); - // expected-error@+2 {{'svaba_n_s64' needs target feature sve2|sme}} - // overload-error@+1 {{'svaba' needs target feature sve2|sme}} + // expected-error@+2 {{'svaba_n_s64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svaba' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svaba,_n_s64,,)(svundef_s64(), svundef_s64(), i64); - // expected-error@+2 {{'svraddhnt_s64' needs target feature sve2|sme}} - // overload-error@+1 {{'svraddhnt' needs target feature sve2|sme}} + // expected-error@+2 {{'svraddhnt_s64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svraddhnt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svraddhnt,_s64,,)(svundef_s32(), svundef_s64(), svundef_s64()); - // expected-error@+2 {{'svraddhnt_n_s64' needs target feature sve2|sme}} - // overload-error@+1 {{'svraddhnt' needs target feature sve2|sme}} + // expected-error@+2 {{'svraddhnt_n_s64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svraddhnt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svraddhnt,_n_s64,,)(svundef_s32(), svundef_s64(), i64); - // expected-error@+2 {{'svuqadd_s64_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svuqadd_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svuqadd_s64_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svuqadd_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svuqadd,_s64,_m,)(pg, svundef_s64(), svundef_u64()); - // expected-error@+2 {{'svuqadd_n_s64_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svuqadd_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svuqadd_n_s64_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svuqadd_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svuqadd,_n_s64,_m,)(pg, svundef_s64(), u64); - // expected-error@+2 {{'svuqadd_s64_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svuqadd_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svuqadd_s64_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svuqadd_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svuqadd,_s64,_z,)(pg, svundef_s64(), svundef_u64()); - // expected-error@+2 {{'svuqadd_n_s64_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svuqadd_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svuqadd_n_s64_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svuqadd_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svuqadd,_n_s64,_z,)(pg, svundef_s64(), u64); - // expected-error@+2 {{'svuqadd_s64_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svuqadd_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svuqadd_s64_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svuqadd_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svuqadd,_s64,_x,)(pg, svundef_s64(), svundef_u64()); - // expected-error@+2 {{'svuqadd_n_s64_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svuqadd_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svuqadd_n_s64_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svuqadd_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svuqadd,_n_s64,_x,)(pg, svundef_s64(), u64); - // expected-error@+2 {{'sveorbt_s64' needs target feature sve2|sme}} - // overload-error@+1 {{'sveorbt' needs target feature sve2|sme}} + // expected-error@+2 {{'sveorbt_s64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'sveorbt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(sveorbt,_s64,,)(svundef_s64(), svundef_s64(), svundef_s64()); - // expected-error@+2 {{'sveorbt_n_s64' needs target feature sve2|sme}} - // overload-error@+1 {{'sveorbt' needs target feature sve2|sme}} + // expected-error@+2 {{'sveorbt_n_s64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'sveorbt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(sveorbt,_n_s64,,)(svundef_s64(), svundef_s64(), i64); - // expected-error@+2 {{'svldnt1sw_gather_u64base_s64' needs target feature sve2}} - // overload-error@+1 {{'svldnt1sw_gather_s64' needs target feature sve2}} + // expected-error@+2 {{'svldnt1sw_gather_u64base_s64' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1sw_gather_s64' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1sw_gather, _u64base, _s64, )(pg, svundef_u64()); - // expected-error@+2 {{'svldnt1sw_gather_s64offset_s64' needs target feature sve2}} - // overload-error@+1 {{'svldnt1sw_gather_offset_s64' needs target feature sve2}} + // expected-error@+2 {{'svldnt1sw_gather_s64offset_s64' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1sw_gather_offset_s64' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1sw_gather_, s64, offset_s64, )(pg, const_i32_ptr, svundef_s64()); - // expected-error@+2 {{'svldnt1sw_gather_u64offset_s64' needs target feature sve2}} - // overload-error@+1 {{'svldnt1sw_gather_offset_s64' needs target feature sve2}} + // expected-error@+2 {{'svldnt1sw_gather_u64offset_s64' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1sw_gather_offset_s64' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1sw_gather_, u64, offset_s64, )(pg, const_i32_ptr, svundef_u64()); - // expected-error@+2 {{'svldnt1sw_gather_u64base_offset_s64' needs target feature sve2}} - // overload-error@+1 {{'svldnt1sw_gather_offset_s64' needs target feature sve2}} + // expected-error@+2 {{'svldnt1sw_gather_u64base_offset_s64' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1sw_gather_offset_s64' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1sw_gather, _u64base, _offset_s64, )(pg, svundef_u64(), i64); - // expected-error@+2 {{'svldnt1sw_gather_s64index_s64' needs target feature sve2}} - // overload-error@+1 {{'svldnt1sw_gather_index_s64' needs target feature sve2}} + // expected-error@+2 {{'svldnt1sw_gather_s64index_s64' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1sw_gather_index_s64' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1sw_gather_, s64, index_s64, )(pg, const_i32_ptr, svundef_s64()); - // expected-error@+2 {{'svldnt1sw_gather_u64index_s64' needs target feature sve2}} - // overload-error@+1 {{'svldnt1sw_gather_index_s64' needs target feature sve2}} + // expected-error@+2 {{'svldnt1sw_gather_u64index_s64' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1sw_gather_index_s64' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1sw_gather_, u64, index_s64, )(pg, const_i32_ptr, svundef_u64()); - // expected-error@+2 {{'svldnt1sw_gather_u64base_index_s64' needs target feature sve2}} - // overload-error@+1 {{'svldnt1sw_gather_index_s64' needs target feature sve2}} + // expected-error@+2 {{'svldnt1sw_gather_u64base_index_s64' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1sw_gather_index_s64' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1sw_gather, _u64base, _index_s64, )(pg, svundef_u64(), i64); - // expected-error@+2 {{'svbsl_s64' needs target feature sve2|sme}} - // overload-error@+1 {{'svbsl' needs target feature sve2|sme}} + // expected-error@+2 {{'svbsl_s64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svbsl' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svbsl,_s64,,)(svundef_s64(), svundef_s64(), svundef_s64()); - // expected-error@+2 {{'svbsl_n_s64' needs target feature sve2|sme}} - // overload-error@+1 {{'svbsl' needs target feature sve2|sme}} + // expected-error@+2 {{'svbsl_n_s64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svbsl' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svbsl,_n_s64,,)(svundef_s64(), svundef_s64(), i64); - // expected-error@+2 {{'svsubltb_s64' needs target feature sve2|sme}} - // overload-error@+1 {{'svsubltb' needs target feature sve2|sme}} + // expected-error@+2 {{'svsubltb_s64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svsubltb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svsubltb,_s64,,)(svundef_s32(), svundef_s32()); - // expected-error@+2 {{'svsubltb_n_s64' needs target feature sve2|sme}} - // overload-error@+1 {{'svsubltb' needs target feature sve2|sme}} + // expected-error@+2 {{'svsubltb_n_s64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svsubltb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svsubltb,_n_s64,,)(svundef_s32(), i32); - // expected-error@+2 {{'svhsub_s64_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svhsub_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svhsub_s64_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhsub_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhsub,_s64,_z,)(pg, svundef_s64(), svundef_s64()); - // expected-error@+2 {{'svhsub_s64_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svhsub_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svhsub_s64_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhsub_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhsub,_s64,_m,)(pg, svundef_s64(), svundef_s64()); - // expected-error@+2 {{'svhsub_s64_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svhsub_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svhsub_s64_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhsub_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhsub,_s64,_x,)(pg, svundef_s64(), svundef_s64()); - // expected-error@+2 {{'svhsub_n_s64_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svhsub_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svhsub_n_s64_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhsub_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhsub,_n_s64,_z,)(pg, svundef_s64(), i64); - // expected-error@+2 {{'svhsub_n_s64_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svhsub_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svhsub_n_s64_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhsub_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhsub,_n_s64,_m,)(pg, svundef_s64(), i64); - // expected-error@+2 {{'svhsub_n_s64_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svhsub_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svhsub_n_s64_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhsub_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhsub,_n_s64,_x,)(pg, svundef_s64(), i64); - // expected-error@+2 {{'svldnt1_gather_u64base_s64' needs target feature sve2}} - // overload-error@+1 {{'svldnt1_gather_s64' needs target feature sve2}} + // expected-error@+2 {{'svldnt1_gather_u64base_s64' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1_gather_s64' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1_gather, _u64base, _s64, )(pg, svundef_u64()); - // expected-error@+2 {{'svldnt1_gather_s64offset_s64' needs target feature sve2}} - // overload-error@+1 {{'svldnt1_gather_offset' needs target feature sve2}} + // expected-error@+2 {{'svldnt1_gather_s64offset_s64' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1_gather_offset' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1_gather_, s64, offset, _s64)(pg, const_i64_ptr, svundef_s64()); - // expected-error@+2 {{'svldnt1_gather_u64offset_s64' needs target feature sve2}} - // overload-error@+1 {{'svldnt1_gather_offset' needs target feature sve2}} + // expected-error@+2 {{'svldnt1_gather_u64offset_s64' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1_gather_offset' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1_gather_, u64, offset, _s64)(pg, const_i64_ptr, svundef_u64()); - // expected-error@+2 {{'svldnt1_gather_u64base_offset_s64' needs target feature sve2}} - // overload-error@+1 {{'svldnt1_gather_offset_s64' needs target feature sve2}} + // expected-error@+2 {{'svldnt1_gather_u64base_offset_s64' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1_gather_offset_s64' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1_gather, _u64base, _offset_s64, )(pg, svundef_u64(), i64); - // expected-error@+2 {{'svldnt1_gather_s64index_s64' needs target feature sve2}} - // overload-error@+1 {{'svldnt1_gather_index' needs target feature sve2}} + // expected-error@+2 {{'svldnt1_gather_s64index_s64' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1_gather_index' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1_gather_, s64, index, _s64)(pg, const_i64_ptr, svundef_s64()); - // expected-error@+2 {{'svldnt1_gather_u64index_s64' needs target feature sve2}} - // overload-error@+1 {{'svldnt1_gather_index' needs target feature sve2}} + // expected-error@+2 {{'svldnt1_gather_u64index_s64' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1_gather_index' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1_gather_, u64, index, _s64)(pg, const_i64_ptr, svundef_u64()); - // expected-error@+2 {{'svldnt1_gather_u64base_index_s64' needs target feature sve2}} - // overload-error@+1 {{'svldnt1_gather_index_s64' needs target feature sve2}} + // expected-error@+2 {{'svldnt1_gather_u64base_index_s64' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1_gather_index_s64' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1_gather, _u64base, _index_s64, )(pg, svundef_u64(), i64); - // expected-error@+2 {{'svaddlb_s64' needs target feature sve2|sme}} - // overload-error@+1 {{'svaddlb' needs target feature sve2|sme}} + // expected-error@+2 {{'svaddlb_s64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svaddlb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svaddlb,_s64,,)(svundef_s32(), svundef_s32()); - // expected-error@+2 {{'svaddlb_n_s64' needs target feature sve2|sme}} - // overload-error@+1 {{'svaddlb' needs target feature sve2|sme}} + // expected-error@+2 {{'svaddlb_n_s64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svaddlb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svaddlb,_n_s64,,)(svundef_s32(), i32); - // expected-error@+2 {{'svqrdmlah_s64' needs target feature sve2|sme}} - // overload-error@+1 {{'svqrdmlah' needs target feature sve2|sme}} + // expected-error@+2 {{'svqrdmlah_s64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqrdmlah' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqrdmlah,_s64,,)(svundef_s64(), svundef_s64(), svundef_s64()); - // expected-error@+2 {{'svqrdmlah_n_s64' needs target feature sve2|sme}} - // overload-error@+1 {{'svqrdmlah' needs target feature sve2|sme}} + // expected-error@+2 {{'svqrdmlah_n_s64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqrdmlah' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqrdmlah,_n_s64,,)(svundef_s64(), svundef_s64(), i64); - // expected-error@+2 {{'svqdmullb_s64' needs target feature sve2|sme}} - // overload-error@+1 {{'svqdmullb' needs target feature sve2|sme}} + // expected-error@+2 {{'svqdmullb_s64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqdmullb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqdmullb,_s64,,)(svundef_s32(), svundef_s32()); - // expected-error@+2 {{'svqdmullb_n_s64' needs target feature sve2|sme}} - // overload-error@+1 {{'svqdmullb' needs target feature sve2|sme}} + // expected-error@+2 {{'svqdmullb_n_s64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqdmullb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqdmullb,_n_s64,,)(svundef_s32(), i32); - // expected-error@+2 {{'svldnt1uw_gather_u64base_s64' needs target feature sve2}} - // overload-error@+1 {{'svldnt1uw_gather_s64' needs target feature sve2}} + // expected-error@+2 {{'svldnt1uw_gather_u64base_s64' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1uw_gather_s64' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1uw_gather, _u64base, _s64, )(pg, svundef_u64()); - // expected-error@+2 {{'svldnt1uw_gather_s64offset_s64' needs target feature sve2}} - // overload-error@+1 {{'svldnt1uw_gather_offset_s64' needs target feature sve2}} + // expected-error@+2 {{'svldnt1uw_gather_s64offset_s64' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1uw_gather_offset_s64' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1uw_gather_, s64, offset_s64, )(pg, const_u32_ptr, svundef_s64()); - // expected-error@+2 {{'svldnt1uw_gather_u64offset_s64' needs target feature sve2}} - // overload-error@+1 {{'svldnt1uw_gather_offset_s64' needs target feature sve2}} + // expected-error@+2 {{'svldnt1uw_gather_u64offset_s64' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1uw_gather_offset_s64' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1uw_gather_, u64, offset_s64, )(pg, const_u32_ptr, svundef_u64()); - // expected-error@+2 {{'svldnt1uw_gather_u64base_offset_s64' needs target feature sve2}} - // overload-error@+1 {{'svldnt1uw_gather_offset_s64' needs target feature sve2}} + // expected-error@+2 {{'svldnt1uw_gather_u64base_offset_s64' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1uw_gather_offset_s64' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1uw_gather, _u64base, _offset_s64, )(pg, svundef_u64(), i64); - // expected-error@+2 {{'svldnt1uw_gather_s64index_s64' needs target feature sve2}} - // overload-error@+1 {{'svldnt1uw_gather_index_s64' needs target feature sve2}} + // expected-error@+2 {{'svldnt1uw_gather_s64index_s64' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1uw_gather_index_s64' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1uw_gather_, s64, index_s64, )(pg, const_u32_ptr, svundef_s64()); - // expected-error@+2 {{'svldnt1uw_gather_u64index_s64' needs target feature sve2}} - // overload-error@+1 {{'svldnt1uw_gather_index_s64' needs target feature sve2}} + // expected-error@+2 {{'svldnt1uw_gather_u64index_s64' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1uw_gather_index_s64' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1uw_gather_, u64, index_s64, )(pg, const_u32_ptr, svundef_u64()); - // expected-error@+2 {{'svldnt1uw_gather_u64base_index_s64' needs target feature sve2}} - // overload-error@+1 {{'svldnt1uw_gather_index_s64' needs target feature sve2}} + // expected-error@+2 {{'svldnt1uw_gather_u64base_index_s64' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1uw_gather_index_s64' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1uw_gather, _u64base, _index_s64, )(pg, svundef_u64(), i64); - // expected-error@+2 {{'svstnt1h_scatter_u64base_s64' needs target feature sve2}} - // overload-error@+1 {{'svstnt1h_scatter' needs target feature sve2}} + // expected-error@+2 {{'svstnt1h_scatter_u64base_s64' needs target feature sve,sve2}} + // overload-error@+1 {{'svstnt1h_scatter' needs target feature sve,sve2}} SVE_ACLE_FUNC(svstnt1h_scatter, _u64base, , _s64)(pg, svundef_u64(), svundef_s64()); - // expected-error@+2 {{'svstnt1h_scatter_s64offset_s64' needs target feature sve2}} - // overload-error@+1 {{'svstnt1h_scatter_offset' needs target feature sve2}} + // expected-error@+2 {{'svstnt1h_scatter_s64offset_s64' needs target feature sve,sve2}} + // overload-error@+1 {{'svstnt1h_scatter_offset' needs target feature sve,sve2}} SVE_ACLE_FUNC(svstnt1h_scatter_, s64, offset, _s64)(pg, i16_ptr, svundef_s64(), svundef_s64()); - // expected-error@+2 {{'svstnt1h_scatter_u64offset_s64' needs target feature sve2}} - // overload-error@+1 {{'svstnt1h_scatter_offset' needs target feature sve2}} + // expected-error@+2 {{'svstnt1h_scatter_u64offset_s64' needs target feature sve,sve2}} + // overload-error@+1 {{'svstnt1h_scatter_offset' needs target feature sve,sve2}} SVE_ACLE_FUNC(svstnt1h_scatter_, u64, offset, _s64)(pg, i16_ptr, svundef_u64(), svundef_s64()); - // expected-error@+2 {{'svstnt1h_scatter_u64base_offset_s64' needs target feature sve2}} - // overload-error@+1 {{'svstnt1h_scatter_offset' needs target feature sve2}} + // expected-error@+2 {{'svstnt1h_scatter_u64base_offset_s64' needs target feature sve,sve2}} + // overload-error@+1 {{'svstnt1h_scatter_offset' needs target feature sve,sve2}} SVE_ACLE_FUNC(svstnt1h_scatter, _u64base, _offset, _s64)(pg, svundef_u64(), i64, svundef_s64()); - // expected-error@+2 {{'svstnt1h_scatter_s64index_s64' needs target feature sve2}} - // overload-error@+1 {{'svstnt1h_scatter_index' needs target feature sve2}} + // expected-error@+2 {{'svstnt1h_scatter_s64index_s64' needs target feature sve,sve2}} + // overload-error@+1 {{'svstnt1h_scatter_index' needs target feature sve,sve2}} SVE_ACLE_FUNC(svstnt1h_scatter_, s64, index, _s64)(pg, i16_ptr, svundef_s64(), svundef_s64()); - // expected-error@+2 {{'svstnt1h_scatter_u64index_s64' needs target feature sve2}} - // overload-error@+1 {{'svstnt1h_scatter_index' needs target feature sve2}} + // expected-error@+2 {{'svstnt1h_scatter_u64index_s64' needs target feature sve,sve2}} + // overload-error@+1 {{'svstnt1h_scatter_index' needs target feature sve,sve2}} SVE_ACLE_FUNC(svstnt1h_scatter_, u64, index, _s64)(pg, i16_ptr, svundef_u64(), svundef_s64()); - // expected-error@+2 {{'svstnt1h_scatter_u64base_index_s64' needs target feature sve2}} - // overload-error@+1 {{'svstnt1h_scatter_index' needs target feature sve2}} + // expected-error@+2 {{'svstnt1h_scatter_u64base_index_s64' needs target feature sve,sve2}} + // overload-error@+1 {{'svstnt1h_scatter_index' needs target feature sve,sve2}} SVE_ACLE_FUNC(svstnt1h_scatter, _u64base, _index, _s64)(pg, svundef_u64(), i64, svundef_s64()); - // expected-error@+2 {{'svstnt1b_scatter_u64base_s64' needs target feature sve2}} - // overload-error@+1 {{'svstnt1b_scatter' needs target feature sve2}} + // expected-error@+2 {{'svstnt1b_scatter_u64base_s64' needs target feature sve,sve2}} + // overload-error@+1 {{'svstnt1b_scatter' needs target feature sve,sve2}} SVE_ACLE_FUNC(svstnt1b_scatter, _u64base, , _s64)(pg, svundef_u64(), svundef_s64()); - // expected-error@+2 {{'svstnt1b_scatter_s64offset_s64' needs target feature sve2}} - // overload-error@+1 {{'svstnt1b_scatter_offset' needs target feature sve2}} + // expected-error@+2 {{'svstnt1b_scatter_s64offset_s64' needs target feature sve,sve2}} + // overload-error@+1 {{'svstnt1b_scatter_offset' needs target feature sve,sve2}} SVE_ACLE_FUNC(svstnt1b_scatter_, s64, offset, _s64)(pg, i8_ptr, svundef_s64(), svundef_s64()); - // expected-error@+2 {{'svstnt1b_scatter_u64offset_s64' needs target feature sve2}} - // overload-error@+1 {{'svstnt1b_scatter_offset' needs target feature sve2}} + // expected-error@+2 {{'svstnt1b_scatter_u64offset_s64' needs target feature sve,sve2}} + // overload-error@+1 {{'svstnt1b_scatter_offset' needs target feature sve,sve2}} SVE_ACLE_FUNC(svstnt1b_scatter_, u64, offset, _s64)(pg, i8_ptr, svundef_u64(), svundef_s64()); - // expected-error@+2 {{'svstnt1b_scatter_u64base_offset_s64' needs target feature sve2}} - // overload-error@+1 {{'svstnt1b_scatter_offset' needs target feature sve2}} + // expected-error@+2 {{'svstnt1b_scatter_u64base_offset_s64' needs target feature sve,sve2}} + // overload-error@+1 {{'svstnt1b_scatter_offset' needs target feature sve,sve2}} SVE_ACLE_FUNC(svstnt1b_scatter, _u64base, _offset, _s64)(pg, svundef_u64(), i64, svundef_s64()); - // expected-error@+2 {{'svbsl2n_s64' needs target feature sve2|sme}} - // overload-error@+1 {{'svbsl2n' needs target feature sve2|sme}} + // expected-error@+2 {{'svbsl2n_s64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svbsl2n' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svbsl2n,_s64,,)(svundef_s64(), svundef_s64(), svundef_s64()); - // expected-error@+2 {{'svbsl2n_n_s64' needs target feature sve2|sme}} - // overload-error@+1 {{'svbsl2n' needs target feature sve2|sme}} + // expected-error@+2 {{'svbsl2n_n_s64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svbsl2n' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svbsl2n,_n_s64,,)(svundef_s64(), svundef_s64(), i64); - // expected-error@+2 {{'svaddlt_s64' needs target feature sve2|sme}} - // overload-error@+1 {{'svaddlt' needs target feature sve2|sme}} + // expected-error@+2 {{'svaddlt_s64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svaddlt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svaddlt,_s64,,)(svundef_s32(), svundef_s32()); - // expected-error@+2 {{'svaddlt_n_s64' needs target feature sve2|sme}} - // overload-error@+1 {{'svaddlt' needs target feature sve2|sme}} + // expected-error@+2 {{'svaddlt_n_s64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svaddlt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svaddlt,_n_s64,,)(svundef_s32(), i32); - // expected-error@+2 {{'svstnt1w_scatter_u64base_s64' needs target feature sve2}} - // overload-error@+1 {{'svstnt1w_scatter' needs target feature sve2}} + // expected-error@+2 {{'svstnt1w_scatter_u64base_s64' needs target feature sve,sve2}} + // overload-error@+1 {{'svstnt1w_scatter' needs target feature sve,sve2}} SVE_ACLE_FUNC(svstnt1w_scatter, _u64base, , _s64)(pg, svundef_u64(), svundef_s64()); - // expected-error@+2 {{'svstnt1w_scatter_s64offset_s64' needs target feature sve2}} - // overload-error@+1 {{'svstnt1w_scatter_offset' needs target feature sve2}} + // expected-error@+2 {{'svstnt1w_scatter_s64offset_s64' needs target feature sve,sve2}} + // overload-error@+1 {{'svstnt1w_scatter_offset' needs target feature sve,sve2}} SVE_ACLE_FUNC(svstnt1w_scatter_, s64, offset, _s64)(pg, i32_ptr, svundef_s64(), svundef_s64()); - // expected-error@+2 {{'svstnt1w_scatter_u64offset_s64' needs target feature sve2}} - // overload-error@+1 {{'svstnt1w_scatter_offset' needs target feature sve2}} + // expected-error@+2 {{'svstnt1w_scatter_u64offset_s64' needs target feature sve,sve2}} + // overload-error@+1 {{'svstnt1w_scatter_offset' needs target feature sve,sve2}} SVE_ACLE_FUNC(svstnt1w_scatter_, u64, offset, _s64)(pg, i32_ptr, svundef_u64(), svundef_s64()); - // expected-error@+2 {{'svstnt1w_scatter_u64base_offset_s64' needs target feature sve2}} - // overload-error@+1 {{'svstnt1w_scatter_offset' needs target feature sve2}} + // expected-error@+2 {{'svstnt1w_scatter_u64base_offset_s64' needs target feature sve,sve2}} + // overload-error@+1 {{'svstnt1w_scatter_offset' needs target feature sve,sve2}} SVE_ACLE_FUNC(svstnt1w_scatter, _u64base, _offset, _s64)(pg, svundef_u64(), i64, svundef_s64()); - // expected-error@+2 {{'svstnt1w_scatter_s64index_s64' needs target feature sve2}} - // overload-error@+1 {{'svstnt1w_scatter_index' needs target feature sve2}} + // expected-error@+2 {{'svstnt1w_scatter_s64index_s64' needs target feature sve,sve2}} + // overload-error@+1 {{'svstnt1w_scatter_index' needs target feature sve,sve2}} SVE_ACLE_FUNC(svstnt1w_scatter_, s64, index, _s64)(pg, i32_ptr, svundef_s64(), svundef_s64()); - // expected-error@+2 {{'svstnt1w_scatter_u64index_s64' needs target feature sve2}} - // overload-error@+1 {{'svstnt1w_scatter_index' needs target feature sve2}} + // expected-error@+2 {{'svstnt1w_scatter_u64index_s64' needs target feature sve,sve2}} + // overload-error@+1 {{'svstnt1w_scatter_index' needs target feature sve,sve2}} SVE_ACLE_FUNC(svstnt1w_scatter_, u64, index, _s64)(pg, i32_ptr, svundef_u64(), svundef_s64()); - // expected-error@+2 {{'svstnt1w_scatter_u64base_index_s64' needs target feature sve2}} - // overload-error@+1 {{'svstnt1w_scatter_index' needs target feature sve2}} + // expected-error@+2 {{'svstnt1w_scatter_u64base_index_s64' needs target feature sve,sve2}} + // overload-error@+1 {{'svstnt1w_scatter_index' needs target feature sve,sve2}} SVE_ACLE_FUNC(svstnt1w_scatter, _u64base, _index, _s64)(pg, svundef_u64(), i64, svundef_s64()); - // expected-error@+2 {{'svqxtunt_s64' needs target feature sve2|sme}} - // overload-error@+1 {{'svqxtunt' needs target feature sve2|sme}} + // expected-error@+2 {{'svqxtunt_s64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqxtunt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqxtunt,_s64,,)(svundef_u32(), svundef_s64()); - // expected-error@+2 {{'svabalb_s64' needs target feature sve2|sme}} - // overload-error@+1 {{'svabalb' needs target feature sve2|sme}} + // expected-error@+2 {{'svabalb_s64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svabalb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svabalb,_s64,,)(svundef_s64(), svundef_s32(), svundef_s32()); - // expected-error@+2 {{'svabalb_n_s64' needs target feature sve2|sme}} - // overload-error@+1 {{'svabalb' needs target feature sve2|sme}} + // expected-error@+2 {{'svabalb_n_s64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svabalb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svabalb,_n_s64,,)(svundef_s64(), svundef_s32(), i32); - // expected-error@+2 {{'svsublb_s64' needs target feature sve2|sme}} - // overload-error@+1 {{'svsublb' needs target feature sve2|sme}} + // expected-error@+2 {{'svsublb_s64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svsublb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svsublb,_s64,,)(svundef_s32(), svundef_s32()); - // expected-error@+2 {{'svsublb_n_s64' needs target feature sve2|sme}} - // overload-error@+1 {{'svsublb' needs target feature sve2|sme}} + // expected-error@+2 {{'svsublb_n_s64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svsublb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svsublb,_n_s64,,)(svundef_s32(), i32); - // expected-error@+2 {{'svbsl1n_s64' needs target feature sve2|sme}} - // overload-error@+1 {{'svbsl1n' needs target feature sve2|sme}} + // expected-error@+2 {{'svbsl1n_s64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svbsl1n' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svbsl1n,_s64,,)(svundef_s64(), svundef_s64(), svundef_s64()); - // expected-error@+2 {{'svbsl1n_n_s64' needs target feature sve2|sme}} - // overload-error@+1 {{'svbsl1n' needs target feature sve2|sme}} + // expected-error@+2 {{'svbsl1n_n_s64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svbsl1n' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svbsl1n,_n_s64,,)(svundef_s64(), svundef_s64(), i64); - // expected-error@+2 {{'svrshl_s64_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svrshl_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svrshl_s64_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrshl_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrshl,_s64,_z,)(pg, svundef_s64(), svundef_s64()); - // expected-error@+2 {{'svrshl_s64_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svrshl_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svrshl_s64_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrshl_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrshl,_s64,_m,)(pg, svundef_s64(), svundef_s64()); - // expected-error@+2 {{'svrshl_s64_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svrshl_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svrshl_s64_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrshl_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrshl,_s64,_x,)(pg, svundef_s64(), svundef_s64()); - // expected-error@+2 {{'svrshl_n_s64_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svrshl_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svrshl_n_s64_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrshl_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrshl,_n_s64,_z,)(pg, svundef_s64(), i64); - // expected-error@+2 {{'svrshl_n_s64_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svrshl_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svrshl_n_s64_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrshl_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrshl,_n_s64,_m,)(pg, svundef_s64(), i64); - // expected-error@+2 {{'svrshl_n_s64_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svrshl_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svrshl_n_s64_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrshl_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrshl,_n_s64,_x,)(pg, svundef_s64(), i64); - // expected-error@+2 {{'svaddwt_s64' needs target feature sve2|sme}} - // overload-error@+1 {{'svaddwt' needs target feature sve2|sme}} + // expected-error@+2 {{'svaddwt_s64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svaddwt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svaddwt,_s64,,)(svundef_s64(), svundef_s32()); - // expected-error@+2 {{'svaddwt_n_s64' needs target feature sve2|sme}} - // overload-error@+1 {{'svaddwt' needs target feature sve2|sme}} + // expected-error@+2 {{'svaddwt_n_s64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svaddwt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svaddwt,_n_s64,,)(svundef_s64(), i32); - // expected-error@+2 {{'svmlslb_s64' needs target feature sve2|sme}} - // overload-error@+1 {{'svmlslb' needs target feature sve2|sme}} + // expected-error@+2 {{'svmlslb_s64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svmlslb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svmlslb,_s64,,)(svundef_s64(), svundef_s32(), svundef_s32()); - // expected-error@+2 {{'svmlslb_n_s64' needs target feature sve2|sme}} - // overload-error@+1 {{'svmlslb' needs target feature sve2|sme}} + // expected-error@+2 {{'svmlslb_n_s64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svmlslb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svmlslb,_n_s64,,)(svundef_s64(), svundef_s32(), i32); - // expected-error@+2 {{'svmlslt_s64' needs target feature sve2|sme}} - // overload-error@+1 {{'svmlslt' needs target feature sve2|sme}} + // expected-error@+2 {{'svmlslt_s64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svmlslt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svmlslt,_s64,,)(svundef_s64(), svundef_s32(), svundef_s32()); - // expected-error@+2 {{'svmlslt_n_s64' needs target feature sve2|sme}} - // overload-error@+1 {{'svmlslt' needs target feature sve2|sme}} + // expected-error@+2 {{'svmlslt_n_s64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svmlslt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svmlslt,_n_s64,,)(svundef_s64(), svundef_s32(), i32); - // expected-error@+2 {{'svqneg_s64_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svqneg_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svqneg_s64_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqneg_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqneg,_s64,_z,)(pg, svundef_s64()); - // expected-error@+2 {{'svqneg_s64_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svqneg_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svqneg_s64_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqneg_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqneg,_s64,_m,)(svundef_s64(), pg, svundef_s64()); - // expected-error@+2 {{'svqneg_s64_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svqneg_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svqneg_s64_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqneg_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqneg,_s64,_x,)(pg, svundef_s64()); - // expected-error@+2 {{'svmovlt_s64' needs target feature sve2|sme}} - // overload-error@+1 {{'svmovlt' needs target feature sve2|sme}} + // expected-error@+2 {{'svmovlt_s64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svmovlt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svmovlt,_s64,,)(svundef_s32()); - // expected-error@+2 {{'svqshl_s64_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svqshl_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svqshl_s64_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqshl_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqshl,_s64,_z,)(pg, svundef_s64(), svundef_s64()); - // expected-error@+2 {{'svqshl_s64_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svqshl_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svqshl_s64_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqshl_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqshl,_s64,_m,)(pg, svundef_s64(), svundef_s64()); - // expected-error@+2 {{'svqshl_s64_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svqshl_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svqshl_s64_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqshl_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqshl,_s64,_x,)(pg, svundef_s64(), svundef_s64()); - // expected-error@+2 {{'svqshl_n_s64_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svqshl_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svqshl_n_s64_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqshl_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqshl,_n_s64,_z,)(pg, svundef_s64(), i64); - // expected-error@+2 {{'svqshl_n_s64_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svqshl_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svqshl_n_s64_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqshl_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqshl,_n_s64,_m,)(pg, svundef_s64(), i64); - // expected-error@+2 {{'svqshl_n_s64_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svqshl_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svqshl_n_s64_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqshl_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqshl,_n_s64,_x,)(pg, svundef_s64(), i64); - // expected-error@+2 {{'svhistseg_u8' needs target feature sve2}} - // overload-error@+1 {{'svhistseg' needs target feature sve2}} + // expected-error@+2 {{'svhistseg_u8' needs target feature sve,sve2}} + // overload-error@+1 {{'svhistseg' needs target feature sve,sve2}} SVE_ACLE_FUNC(svhistseg,_u8,,)(svundef_u8(), svundef_u8()); - // expected-error@+2 {{'svpmullb_pair_u8' needs target feature sve2|sme}} - // overload-error@+1 {{'svpmullb_pair' needs target feature sve2|sme}} + // expected-error@+2 {{'svpmullb_pair_u8' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svpmullb_pair' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svpmullb_pair,_u8,,)(svundef_u8(), svundef_u8()); - // expected-error@+2 {{'svpmullb_pair_n_u8' needs target feature sve2|sme}} - // overload-error@+1 {{'svpmullb_pair' needs target feature sve2|sme}} + // expected-error@+2 {{'svpmullb_pair_n_u8' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svpmullb_pair' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svpmullb_pair,_n_u8,,)(svundef_u8(), u8); - // expected-error@+2 {{'svnbsl_u8' needs target feature sve2|sme}} - // overload-error@+1 {{'svnbsl' needs target feature sve2|sme}} + // expected-error@+2 {{'svnbsl_u8' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svnbsl' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svnbsl,_u8,,)(svundef_u8(), svundef_u8(), svundef_u8()); - // expected-error@+2 {{'svnbsl_n_u8' needs target feature sve2|sme}} - // overload-error@+1 {{'svnbsl' needs target feature sve2|sme}} + // expected-error@+2 {{'svnbsl_n_u8' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svnbsl' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svnbsl,_n_u8,,)(svundef_u8(), svundef_u8(), u8); - // expected-error@+2 {{'svtbl2_u8' needs target feature sve2|sme}} - // overload-error@+1 {{'svtbl2' needs target feature sve2|sme}} + // expected-error@+2 {{'svtbl2_u8' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svtbl2' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svtbl2,_u8,,)(svundef2_u8(), svundef_u8()); - // expected-error@+2 {{'svhsubr_u8_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svhsubr_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svhsubr_u8_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhsubr_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhsubr,_u8,_z,)(pg, svundef_u8(), svundef_u8()); - // expected-error@+2 {{'svhsubr_u8_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svhsubr_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svhsubr_u8_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhsubr_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhsubr,_u8,_m,)(pg, svundef_u8(), svundef_u8()); - // expected-error@+2 {{'svhsubr_u8_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svhsubr_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svhsubr_u8_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhsubr_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhsubr,_u8,_x,)(pg, svundef_u8(), svundef_u8()); - // expected-error@+2 {{'svhsubr_n_u8_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svhsubr_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svhsubr_n_u8_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhsubr_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhsubr,_n_u8,_z,)(pg, svundef_u8(), u8); - // expected-error@+2 {{'svhsubr_n_u8_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svhsubr_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svhsubr_n_u8_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhsubr_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhsubr,_n_u8,_m,)(pg, svundef_u8(), u8); - // expected-error@+2 {{'svhsubr_n_u8_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svhsubr_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svhsubr_n_u8_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhsubr_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhsubr,_n_u8,_x,)(pg, svundef_u8(), u8); - // expected-error@+2 {{'svpmul_u8' needs target feature sve2|sme}} - // overload-error@+1 {{'svpmul' needs target feature sve2|sme}} + // expected-error@+2 {{'svpmul_u8' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svpmul' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svpmul,_u8,,)(svundef_u8(), svundef_u8()); - // expected-error@+2 {{'svpmul_n_u8' needs target feature sve2|sme}} - // overload-error@+1 {{'svpmul' needs target feature sve2|sme}} + // expected-error@+2 {{'svpmul_n_u8' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svpmul' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svpmul,_n_u8,,)(svundef_u8(), u8); - // expected-error@+2 {{'sveortb_u8' needs target feature sve2|sme}} - // overload-error@+1 {{'sveortb' needs target feature sve2|sme}} + // expected-error@+2 {{'sveortb_u8' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'sveortb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(sveortb,_u8,,)(svundef_u8(), svundef_u8(), svundef_u8()); - // expected-error@+2 {{'sveortb_n_u8' needs target feature sve2|sme}} - // overload-error@+1 {{'sveortb' needs target feature sve2|sme}} + // expected-error@+2 {{'sveortb_n_u8' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'sveortb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(sveortb,_n_u8,,)(svundef_u8(), svundef_u8(), u8); - // expected-error@+2 {{'svbcax_u8' needs target feature sve2|sme}} - // overload-error@+1 {{'svbcax' needs target feature sve2|sme}} + // expected-error@+2 {{'svbcax_u8' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svbcax' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svbcax,_u8,,)(svundef_u8(), svundef_u8(), svundef_u8()); - // expected-error@+2 {{'svbcax_n_u8' needs target feature sve2|sme}} - // overload-error@+1 {{'svbcax' needs target feature sve2|sme}} + // expected-error@+2 {{'svbcax_n_u8' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svbcax' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svbcax,_n_u8,,)(svundef_u8(), svundef_u8(), u8); - // expected-error@+2 {{'svqrshl_u8_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svqrshl_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svqrshl_u8_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqrshl_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqrshl,_u8,_z,)(pg, svundef_u8(), svundef_s8()); - // expected-error@+2 {{'svqrshl_u8_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svqrshl_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svqrshl_u8_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqrshl_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqrshl,_u8,_m,)(pg, svundef_u8(), svundef_s8()); - // expected-error@+2 {{'svqrshl_u8_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svqrshl_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svqrshl_u8_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqrshl_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqrshl,_u8,_x,)(pg, svundef_u8(), svundef_s8()); - // expected-error@+2 {{'svqrshl_n_u8_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svqrshl_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svqrshl_n_u8_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqrshl_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqrshl,_n_u8,_z,)(pg, svundef_u8(), i8); - // expected-error@+2 {{'svqrshl_n_u8_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svqrshl_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svqrshl_n_u8_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqrshl_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqrshl,_n_u8,_m,)(pg, svundef_u8(), i8); - // expected-error@+2 {{'svqrshl_n_u8_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svqrshl_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svqrshl_n_u8_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqrshl_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqrshl,_n_u8,_x,)(pg, svundef_u8(), i8); - // expected-error@+2 {{'svpmullt_pair_u8' needs target feature sve2|sme}} - // overload-error@+1 {{'svpmullt_pair' needs target feature sve2|sme}} + // expected-error@+2 {{'svpmullt_pair_u8' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svpmullt_pair' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svpmullt_pair,_u8,,)(svundef_u8(), svundef_u8()); - // expected-error@+2 {{'svpmullt_pair_n_u8' needs target feature sve2|sme}} - // overload-error@+1 {{'svpmullt_pair' needs target feature sve2|sme}} + // expected-error@+2 {{'svpmullt_pair_n_u8' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svpmullt_pair' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svpmullt_pair,_n_u8,,)(svundef_u8(), u8); - // expected-error@+2 {{'svqsubr_u8_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svqsubr_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svqsubr_u8_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqsubr_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqsubr,_u8,_z,)(pg, svundef_u8(), svundef_u8()); - // expected-error@+2 {{'svqsubr_u8_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svqsubr_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svqsubr_u8_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqsubr_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqsubr,_u8,_m,)(pg, svundef_u8(), svundef_u8()); - // expected-error@+2 {{'svqsubr_u8_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svqsubr_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svqsubr_u8_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqsubr_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqsubr,_u8,_x,)(pg, svundef_u8(), svundef_u8()); - // expected-error@+2 {{'svqsubr_n_u8_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svqsubr_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svqsubr_n_u8_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqsubr_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqsubr,_n_u8,_z,)(pg, svundef_u8(), u8); - // expected-error@+2 {{'svqsubr_n_u8_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svqsubr_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svqsubr_n_u8_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqsubr_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqsubr,_n_u8,_m,)(pg, svundef_u8(), u8); - // expected-error@+2 {{'svqsubr_n_u8_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svqsubr_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svqsubr_n_u8_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqsubr_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqsubr,_n_u8,_x,)(pg, svundef_u8(), u8); - // expected-error@+2 {{'svaddp_u8_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svaddp_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svaddp_u8_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svaddp_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svaddp,_u8,_m,)(pg, svundef_u8(), svundef_u8()); - // expected-error@+2 {{'svaddp_u8_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svaddp_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svaddp_u8_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svaddp_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svaddp,_u8,_x,)(pg, svundef_u8(), svundef_u8()); - // expected-error@+2 {{'svqadd_u8_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svqadd_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svqadd_u8_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqadd_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqadd,_u8,_m,)(pg, svundef_u8(), svundef_u8()); - // expected-error@+2 {{'svqadd_n_u8_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svqadd_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svqadd_n_u8_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqadd_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqadd,_n_u8,_m,)(pg, svundef_u8(), u8); - // expected-error@+2 {{'svqadd_u8_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svqadd_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svqadd_u8_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqadd_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqadd,_u8,_z,)(pg, svundef_u8(), svundef_u8()); - // expected-error@+2 {{'svqadd_n_u8_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svqadd_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svqadd_n_u8_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqadd_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqadd,_n_u8,_z,)(pg, svundef_u8(), u8); - // expected-error@+2 {{'svqadd_u8_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svqadd_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svqadd_u8_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqadd_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqadd,_u8,_x,)(pg, svundef_u8(), svundef_u8()); - // expected-error@+2 {{'svqadd_n_u8_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svqadd_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svqadd_n_u8_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqadd_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqadd,_n_u8,_x,)(pg, svundef_u8(), u8); - // expected-error@+2 {{'svtbx_u8' needs target feature sve2|sme}} - // overload-error@+1 {{'svtbx' needs target feature sve2|sme}} + // expected-error@+2 {{'svtbx_u8' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svtbx' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svtbx,_u8,,)(svundef_u8(), svundef_u8(), svundef_u8()); - // expected-error@+2 {{'svminp_u8_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svminp_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svminp_u8_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svminp_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svminp,_u8,_m,)(pg, svundef_u8(), svundef_u8()); - // expected-error@+2 {{'svminp_u8_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svminp_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svminp_u8_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svminp_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svminp,_u8,_x,)(pg, svundef_u8(), svundef_u8()); - // expected-error@+2 {{'svsqadd_u8_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svsqadd_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svsqadd_u8_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svsqadd_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svsqadd,_u8,_m,)(pg, svundef_u8(), svundef_s8()); - // expected-error@+2 {{'svsqadd_n_u8_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svsqadd_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svsqadd_n_u8_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svsqadd_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svsqadd,_n_u8,_m,)(pg, svundef_u8(), i8); - // expected-error@+2 {{'svsqadd_u8_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svsqadd_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svsqadd_u8_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svsqadd_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svsqadd,_u8,_z,)(pg, svundef_u8(), svundef_s8()); - // expected-error@+2 {{'svsqadd_n_u8_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svsqadd_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svsqadd_n_u8_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svsqadd_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svsqadd,_n_u8,_z,)(pg, svundef_u8(), i8); - // expected-error@+2 {{'svsqadd_u8_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svsqadd_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svsqadd_u8_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svsqadd_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svsqadd,_u8,_x,)(pg, svundef_u8(), svundef_s8()); - // expected-error@+2 {{'svsqadd_n_u8_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svsqadd_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svsqadd_n_u8_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svsqadd_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svsqadd,_n_u8,_x,)(pg, svundef_u8(), i8); - // expected-error@+2 {{'svqsub_u8_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svqsub_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svqsub_u8_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqsub_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqsub,_u8,_z,)(pg, svundef_u8(), svundef_u8()); - // expected-error@+2 {{'svqsub_u8_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svqsub_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svqsub_u8_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqsub_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqsub,_u8,_m,)(pg, svundef_u8(), svundef_u8()); - // expected-error@+2 {{'svqsub_u8_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svqsub_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svqsub_u8_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqsub_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqsub,_u8,_x,)(pg, svundef_u8(), svundef_u8()); - // expected-error@+2 {{'svqsub_n_u8_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svqsub_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svqsub_n_u8_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqsub_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqsub,_n_u8,_z,)(pg, svundef_u8(), u8); - // expected-error@+2 {{'svqsub_n_u8_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svqsub_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svqsub_n_u8_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqsub_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqsub,_n_u8,_m,)(pg, svundef_u8(), u8); - // expected-error@+2 {{'svqsub_n_u8_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svqsub_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svqsub_n_u8_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqsub_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqsub,_n_u8,_x,)(pg, svundef_u8(), u8); - // expected-error@+2 {{'sveor3_u8' needs target feature sve2|sme}} - // overload-error@+1 {{'sveor3' needs target feature sve2|sme}} + // expected-error@+2 {{'sveor3_u8' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'sveor3' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(sveor3,_u8,,)(svundef_u8(), svundef_u8(), svundef_u8()); - // expected-error@+2 {{'sveor3_n_u8' needs target feature sve2|sme}} - // overload-error@+1 {{'sveor3' needs target feature sve2|sme}} + // expected-error@+2 {{'sveor3_n_u8' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'sveor3' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(sveor3,_n_u8,,)(svundef_u8(), svundef_u8(), u8); - // expected-error@+2 {{'svhadd_u8_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svhadd_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svhadd_u8_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhadd_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhadd,_u8,_m,)(pg, svundef_u8(), svundef_u8()); - // expected-error@+2 {{'svhadd_n_u8_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svhadd_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svhadd_n_u8_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhadd_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhadd,_n_u8,_m,)(pg, svundef_u8(), u8); - // expected-error@+2 {{'svhadd_u8_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svhadd_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svhadd_u8_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhadd_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhadd,_u8,_z,)(pg, svundef_u8(), svundef_u8()); - // expected-error@+2 {{'svhadd_n_u8_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svhadd_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svhadd_n_u8_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhadd_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhadd,_n_u8,_z,)(pg, svundef_u8(), u8); - // expected-error@+2 {{'svhadd_u8_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svhadd_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svhadd_u8_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhadd_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhadd,_u8,_x,)(pg, svundef_u8(), svundef_u8()); - // expected-error@+2 {{'svhadd_n_u8_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svhadd_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svhadd_n_u8_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhadd_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhadd,_n_u8,_x,)(pg, svundef_u8(), u8); - // expected-error@+2 {{'svmaxp_u8_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svmaxp_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svmaxp_u8_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svmaxp_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svmaxp,_u8,_m,)(pg, svundef_u8(), svundef_u8()); - // expected-error@+2 {{'svmaxp_u8_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svmaxp_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svmaxp_u8_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svmaxp_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svmaxp,_u8,_x,)(pg, svundef_u8(), svundef_u8()); - // expected-error@+2 {{'svmatch_u8' needs target feature sve2}} - // overload-error@+1 {{'svmatch' needs target feature sve2}} + // expected-error@+2 {{'svmatch_u8' needs target feature sve,sve2}} + // overload-error@+1 {{'svmatch' needs target feature sve,sve2}} SVE_ACLE_FUNC(svmatch,_u8,,)(pg, svundef_u8(), svundef_u8()); - // expected-error@+2 {{'svwhilerw_u8' needs target feature sve2|sme}} - // overload-error@+1 {{'svwhilerw' needs target feature sve2|sme}} + // expected-error@+2 {{'svwhilerw_u8' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svwhilerw' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svwhilerw,_u8,,)(const_u8_ptr, const_u8_ptr); - // expected-error@+2 {{'svrhadd_u8_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svrhadd_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svrhadd_u8_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrhadd_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrhadd,_u8,_m,)(pg, svundef_u8(), svundef_u8()); - // expected-error@+2 {{'svrhadd_n_u8_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svrhadd_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svrhadd_n_u8_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrhadd_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrhadd,_n_u8,_m,)(pg, svundef_u8(), u8); - // expected-error@+2 {{'svrhadd_u8_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svrhadd_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svrhadd_u8_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrhadd_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrhadd,_u8,_z,)(pg, svundef_u8(), svundef_u8()); - // expected-error@+2 {{'svrhadd_n_u8_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svrhadd_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svrhadd_n_u8_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrhadd_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrhadd,_n_u8,_z,)(pg, svundef_u8(), u8); - // expected-error@+2 {{'svrhadd_u8_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svrhadd_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svrhadd_u8_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrhadd_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrhadd,_u8,_x,)(pg, svundef_u8(), svundef_u8()); - // expected-error@+2 {{'svrhadd_n_u8_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svrhadd_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svrhadd_n_u8_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrhadd_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrhadd,_n_u8,_x,)(pg, svundef_u8(), u8); - // expected-error@+2 {{'svwhilewr_u8' needs target feature sve2|sme}} - // overload-error@+1 {{'svwhilewr' needs target feature sve2|sme}} + // expected-error@+2 {{'svwhilewr_u8' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svwhilewr' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svwhilewr,_u8,,)(const_u8_ptr, const_u8_ptr); - // expected-error@+2 {{'svnmatch_u8' needs target feature sve2}} - // overload-error@+1 {{'svnmatch' needs target feature sve2}} + // expected-error@+2 {{'svnmatch_u8' needs target feature sve,sve2}} + // overload-error@+1 {{'svnmatch' needs target feature sve,sve2}} SVE_ACLE_FUNC(svnmatch,_u8,,)(pg, svundef_u8(), svundef_u8()); - // expected-error@+2 {{'svaba_u8' needs target feature sve2|sme}} - // overload-error@+1 {{'svaba' needs target feature sve2|sme}} + // expected-error@+2 {{'svaba_u8' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svaba' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svaba,_u8,,)(svundef_u8(), svundef_u8(), svundef_u8()); - // expected-error@+2 {{'svaba_n_u8' needs target feature sve2|sme}} - // overload-error@+1 {{'svaba' needs target feature sve2|sme}} + // expected-error@+2 {{'svaba_n_u8' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svaba' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svaba,_n_u8,,)(svundef_u8(), svundef_u8(), u8); - // expected-error@+2 {{'sveorbt_u8' needs target feature sve2|sme}} - // overload-error@+1 {{'sveorbt' needs target feature sve2|sme}} + // expected-error@+2 {{'sveorbt_u8' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'sveorbt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(sveorbt,_u8,,)(svundef_u8(), svundef_u8(), svundef_u8()); - // expected-error@+2 {{'sveorbt_n_u8' needs target feature sve2|sme}} - // overload-error@+1 {{'sveorbt' needs target feature sve2|sme}} + // expected-error@+2 {{'sveorbt_n_u8' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'sveorbt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(sveorbt,_n_u8,,)(svundef_u8(), svundef_u8(), u8); - // expected-error@+2 {{'svbsl_u8' needs target feature sve2|sme}} - // overload-error@+1 {{'svbsl' needs target feature sve2|sme}} + // expected-error@+2 {{'svbsl_u8' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svbsl' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svbsl,_u8,,)(svundef_u8(), svundef_u8(), svundef_u8()); - // expected-error@+2 {{'svbsl_n_u8' needs target feature sve2|sme}} - // overload-error@+1 {{'svbsl' needs target feature sve2|sme}} + // expected-error@+2 {{'svbsl_n_u8' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svbsl' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svbsl,_n_u8,,)(svundef_u8(), svundef_u8(), u8); - // expected-error@+2 {{'svhsub_u8_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svhsub_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svhsub_u8_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhsub_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhsub,_u8,_z,)(pg, svundef_u8(), svundef_u8()); - // expected-error@+2 {{'svhsub_u8_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svhsub_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svhsub_u8_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhsub_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhsub,_u8,_m,)(pg, svundef_u8(), svundef_u8()); - // expected-error@+2 {{'svhsub_u8_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svhsub_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svhsub_u8_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhsub_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhsub,_u8,_x,)(pg, svundef_u8(), svundef_u8()); - // expected-error@+2 {{'svhsub_n_u8_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svhsub_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svhsub_n_u8_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhsub_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhsub,_n_u8,_z,)(pg, svundef_u8(), u8); - // expected-error@+2 {{'svhsub_n_u8_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svhsub_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svhsub_n_u8_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhsub_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhsub,_n_u8,_m,)(pg, svundef_u8(), u8); - // expected-error@+2 {{'svhsub_n_u8_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svhsub_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svhsub_n_u8_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhsub_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhsub,_n_u8,_x,)(pg, svundef_u8(), u8); - // expected-error@+2 {{'svbsl2n_u8' needs target feature sve2|sme}} - // overload-error@+1 {{'svbsl2n' needs target feature sve2|sme}} + // expected-error@+2 {{'svbsl2n_u8' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svbsl2n' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svbsl2n,_u8,,)(svundef_u8(), svundef_u8(), svundef_u8()); - // expected-error@+2 {{'svbsl2n_n_u8' needs target feature sve2|sme}} - // overload-error@+1 {{'svbsl2n' needs target feature sve2|sme}} + // expected-error@+2 {{'svbsl2n_n_u8' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svbsl2n' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svbsl2n,_n_u8,,)(svundef_u8(), svundef_u8(), u8); - // expected-error@+2 {{'svbsl1n_u8' needs target feature sve2|sme}} - // overload-error@+1 {{'svbsl1n' needs target feature sve2|sme}} + // expected-error@+2 {{'svbsl1n_u8' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svbsl1n' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svbsl1n,_u8,,)(svundef_u8(), svundef_u8(), svundef_u8()); - // expected-error@+2 {{'svbsl1n_n_u8' needs target feature sve2|sme}} - // overload-error@+1 {{'svbsl1n' needs target feature sve2|sme}} + // expected-error@+2 {{'svbsl1n_n_u8' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svbsl1n' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svbsl1n,_n_u8,,)(svundef_u8(), svundef_u8(), u8); - // expected-error@+2 {{'svrshl_u8_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svrshl_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svrshl_u8_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrshl_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrshl,_u8,_z,)(pg, svundef_u8(), svundef_s8()); - // expected-error@+2 {{'svrshl_u8_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svrshl_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svrshl_u8_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrshl_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrshl,_u8,_m,)(pg, svundef_u8(), svundef_s8()); - // expected-error@+2 {{'svrshl_u8_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svrshl_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svrshl_u8_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrshl_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrshl,_u8,_x,)(pg, svundef_u8(), svundef_s8()); - // expected-error@+2 {{'svrshl_n_u8_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svrshl_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svrshl_n_u8_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrshl_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrshl,_n_u8,_z,)(pg, svundef_u8(), i8); - // expected-error@+2 {{'svrshl_n_u8_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svrshl_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svrshl_n_u8_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrshl_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrshl,_n_u8,_m,)(pg, svundef_u8(), i8); - // expected-error@+2 {{'svrshl_n_u8_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svrshl_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svrshl_n_u8_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrshl_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrshl,_n_u8,_x,)(pg, svundef_u8(), i8); - // expected-error@+2 {{'svqshl_u8_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svqshl_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svqshl_u8_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqshl_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqshl,_u8,_z,)(pg, svundef_u8(), svundef_s8()); - // expected-error@+2 {{'svqshl_u8_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svqshl_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svqshl_u8_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqshl_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqshl,_u8,_m,)(pg, svundef_u8(), svundef_s8()); - // expected-error@+2 {{'svqshl_u8_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svqshl_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svqshl_u8_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqshl_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqshl,_u8,_x,)(pg, svundef_u8(), svundef_s8()); - // expected-error@+2 {{'svqshl_n_u8_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svqshl_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svqshl_n_u8_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqshl_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqshl,_n_u8,_z,)(pg, svundef_u8(), i8); - // expected-error@+2 {{'svqshl_n_u8_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svqshl_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svqshl_n_u8_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqshl_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqshl,_n_u8,_m,)(pg, svundef_u8(), i8); - // expected-error@+2 {{'svqshl_n_u8_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svqshl_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svqshl_n_u8_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqshl_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqshl,_n_u8,_x,)(pg, svundef_u8(), i8); - // expected-error@+2 {{'svmullb_u16' needs target feature sve2|sme}} - // overload-error@+1 {{'svmullb' needs target feature sve2|sme}} + // expected-error@+2 {{'svmullb_u16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svmullb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svmullb,_u16,,)(svundef_u8(), svundef_u8()); - // expected-error@+2 {{'svmullb_n_u16' needs target feature sve2|sme}} - // overload-error@+1 {{'svmullb' needs target feature sve2|sme}} + // expected-error@+2 {{'svmullb_n_u16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svmullb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svmullb,_n_u16,,)(svundef_u8(), u8); - // expected-error@+2 {{'svpmullb_u16' needs target feature sve2|sme}} - // overload-error@+1 {{'svpmullb' needs target feature sve2|sme}} + // expected-error@+2 {{'svpmullb_u16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svpmullb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svpmullb,_u16,,)(svundef_u8(), svundef_u8()); - // expected-error@+2 {{'svpmullb_n_u16' needs target feature sve2|sme}} - // overload-error@+1 {{'svpmullb' needs target feature sve2|sme}} + // expected-error@+2 {{'svpmullb_n_u16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svpmullb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svpmullb,_n_u16,,)(svundef_u8(), u8); - // expected-error@+2 {{'svaddwb_u16' needs target feature sve2|sme}} - // overload-error@+1 {{'svaddwb' needs target feature sve2|sme}} + // expected-error@+2 {{'svaddwb_u16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svaddwb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svaddwb,_u16,,)(svundef_u16(), svundef_u8()); - // expected-error@+2 {{'svaddwb_n_u16' needs target feature sve2|sme}} - // overload-error@+1 {{'svaddwb' needs target feature sve2|sme}} + // expected-error@+2 {{'svaddwb_n_u16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svaddwb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svaddwb,_n_u16,,)(svundef_u16(), u8); - // expected-error@+2 {{'svsubhnb_u16' needs target feature sve2|sme}} - // overload-error@+1 {{'svsubhnb' needs target feature sve2|sme}} + // expected-error@+2 {{'svsubhnb_u16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svsubhnb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svsubhnb,_u16,,)(svundef_u16(), svundef_u16()); - // expected-error@+2 {{'svsubhnb_n_u16' needs target feature sve2|sme}} - // overload-error@+1 {{'svsubhnb' needs target feature sve2|sme}} + // expected-error@+2 {{'svsubhnb_n_u16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svsubhnb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svsubhnb,_n_u16,,)(svundef_u16(), u16); - // expected-error@+2 {{'svrsubhnt_u16' needs target feature sve2|sme}} - // overload-error@+1 {{'svrsubhnt' needs target feature sve2|sme}} + // expected-error@+2 {{'svrsubhnt_u16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrsubhnt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrsubhnt,_u16,,)(svundef_u8(), svundef_u16(), svundef_u16()); - // expected-error@+2 {{'svrsubhnt_n_u16' needs target feature sve2|sme}} - // overload-error@+1 {{'svrsubhnt' needs target feature sve2|sme}} + // expected-error@+2 {{'svrsubhnt_n_u16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrsubhnt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrsubhnt,_n_u16,,)(svundef_u8(), svundef_u16(), u16); - // expected-error@+2 {{'svnbsl_u16' needs target feature sve2|sme}} - // overload-error@+1 {{'svnbsl' needs target feature sve2|sme}} + // expected-error@+2 {{'svnbsl_u16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svnbsl' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svnbsl,_u16,,)(svundef_u16(), svundef_u16(), svundef_u16()); - // expected-error@+2 {{'svnbsl_n_u16' needs target feature sve2|sme}} - // overload-error@+1 {{'svnbsl' needs target feature sve2|sme}} + // expected-error@+2 {{'svnbsl_n_u16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svnbsl' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svnbsl,_n_u16,,)(svundef_u16(), svundef_u16(), u16); - // expected-error@+2 {{'svsubhnt_u16' needs target feature sve2|sme}} - // overload-error@+1 {{'svsubhnt' needs target feature sve2|sme}} + // expected-error@+2 {{'svsubhnt_u16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svsubhnt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svsubhnt,_u16,,)(svundef_u8(), svundef_u16(), svundef_u16()); - // expected-error@+2 {{'svsubhnt_n_u16' needs target feature sve2|sme}} - // overload-error@+1 {{'svsubhnt' needs target feature sve2|sme}} + // expected-error@+2 {{'svsubhnt_n_u16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svsubhnt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svsubhnt,_n_u16,,)(svundef_u8(), svundef_u16(), u16); - // expected-error@+2 {{'svtbl2_u16' needs target feature sve2|sme}} - // overload-error@+1 {{'svtbl2' needs target feature sve2|sme}} + // expected-error@+2 {{'svtbl2_u16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svtbl2' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svtbl2,_u16,,)(svundef2_u16(), svundef_u16()); - // expected-error@+2 {{'svhsubr_u16_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svhsubr_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svhsubr_u16_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhsubr_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhsubr,_u16,_z,)(pg, svundef_u16(), svundef_u16()); - // expected-error@+2 {{'svhsubr_u16_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svhsubr_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svhsubr_u16_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhsubr_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhsubr,_u16,_m,)(pg, svundef_u16(), svundef_u16()); - // expected-error@+2 {{'svhsubr_u16_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svhsubr_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svhsubr_u16_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhsubr_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhsubr,_u16,_x,)(pg, svundef_u16(), svundef_u16()); - // expected-error@+2 {{'svhsubr_n_u16_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svhsubr_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svhsubr_n_u16_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhsubr_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhsubr,_n_u16,_z,)(pg, svundef_u16(), u16); - // expected-error@+2 {{'svhsubr_n_u16_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svhsubr_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svhsubr_n_u16_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhsubr_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhsubr,_n_u16,_m,)(pg, svundef_u16(), u16); - // expected-error@+2 {{'svhsubr_n_u16_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svhsubr_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svhsubr_n_u16_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhsubr_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhsubr,_n_u16,_x,)(pg, svundef_u16(), u16); - // expected-error@+2 {{'sveortb_u16' needs target feature sve2|sme}} - // overload-error@+1 {{'sveortb' needs target feature sve2|sme}} + // expected-error@+2 {{'sveortb_u16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'sveortb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(sveortb,_u16,,)(svundef_u16(), svundef_u16(), svundef_u16()); - // expected-error@+2 {{'sveortb_n_u16' needs target feature sve2|sme}} - // overload-error@+1 {{'sveortb' needs target feature sve2|sme}} + // expected-error@+2 {{'sveortb_n_u16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'sveortb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(sveortb,_n_u16,,)(svundef_u16(), svundef_u16(), u16); - // expected-error@+2 {{'svqxtnb_u16' needs target feature sve2|sme}} - // overload-error@+1 {{'svqxtnb' needs target feature sve2|sme}} + // expected-error@+2 {{'svqxtnb_u16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqxtnb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqxtnb,_u16,,)(svundef_u16()); - // expected-error@+2 {{'svmlalt_u16' needs target feature sve2|sme}} - // overload-error@+1 {{'svmlalt' needs target feature sve2|sme}} + // expected-error@+2 {{'svmlalt_u16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svmlalt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svmlalt,_u16,,)(svundef_u16(), svundef_u8(), svundef_u8()); - // expected-error@+2 {{'svmlalt_n_u16' needs target feature sve2|sme}} - // overload-error@+1 {{'svmlalt' needs target feature sve2|sme}} + // expected-error@+2 {{'svmlalt_n_u16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svmlalt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svmlalt,_n_u16,,)(svundef_u16(), svundef_u8(), u8); - // expected-error@+2 {{'svaddhnt_u16' needs target feature sve2|sme}} - // overload-error@+1 {{'svaddhnt' needs target feature sve2|sme}} + // expected-error@+2 {{'svaddhnt_u16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svaddhnt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svaddhnt,_u16,,)(svundef_u8(), svundef_u16(), svundef_u16()); - // expected-error@+2 {{'svaddhnt_n_u16' needs target feature sve2|sme}} - // overload-error@+1 {{'svaddhnt' needs target feature sve2|sme}} + // expected-error@+2 {{'svaddhnt_n_u16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svaddhnt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svaddhnt,_n_u16,,)(svundef_u8(), svundef_u16(), u16); - // expected-error@+2 {{'svbcax_u16' needs target feature sve2|sme}} - // overload-error@+1 {{'svbcax' needs target feature sve2|sme}} + // expected-error@+2 {{'svbcax_u16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svbcax' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svbcax,_u16,,)(svundef_u16(), svundef_u16(), svundef_u16()); - // expected-error@+2 {{'svbcax_n_u16' needs target feature sve2|sme}} - // overload-error@+1 {{'svbcax' needs target feature sve2|sme}} + // expected-error@+2 {{'svbcax_n_u16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svbcax' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svbcax,_n_u16,,)(svundef_u16(), svundef_u16(), u16); - // expected-error@+2 {{'svqxtnt_u16' needs target feature sve2|sme}} - // overload-error@+1 {{'svqxtnt' needs target feature sve2|sme}} + // expected-error@+2 {{'svqxtnt_u16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqxtnt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqxtnt,_u16,,)(svundef_u8(), svundef_u16()); - // expected-error@+2 {{'svqrshl_u16_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svqrshl_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svqrshl_u16_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqrshl_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqrshl,_u16,_z,)(pg, svundef_u16(), svundef_s16()); - // expected-error@+2 {{'svqrshl_u16_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svqrshl_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svqrshl_u16_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqrshl_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqrshl,_u16,_m,)(pg, svundef_u16(), svundef_s16()); - // expected-error@+2 {{'svqrshl_u16_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svqrshl_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svqrshl_u16_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqrshl_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqrshl,_u16,_x,)(pg, svundef_u16(), svundef_s16()); - // expected-error@+2 {{'svqrshl_n_u16_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svqrshl_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svqrshl_n_u16_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqrshl_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqrshl,_n_u16,_z,)(pg, svundef_u16(), i16); - // expected-error@+2 {{'svqrshl_n_u16_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svqrshl_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svqrshl_n_u16_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqrshl_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqrshl,_n_u16,_m,)(pg, svundef_u16(), i16); - // expected-error@+2 {{'svqrshl_n_u16_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svqrshl_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svqrshl_n_u16_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqrshl_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqrshl,_n_u16,_x,)(pg, svundef_u16(), i16); - // expected-error@+2 {{'svsublt_u16' needs target feature sve2|sme}} - // overload-error@+1 {{'svsublt' needs target feature sve2|sme}} + // expected-error@+2 {{'svsublt_u16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svsublt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svsublt,_u16,,)(svundef_u8(), svundef_u8()); - // expected-error@+2 {{'svsublt_n_u16' needs target feature sve2|sme}} - // overload-error@+1 {{'svsublt' needs target feature sve2|sme}} + // expected-error@+2 {{'svsublt_n_u16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svsublt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svsublt,_n_u16,,)(svundef_u8(), u8); - // expected-error@+2 {{'svadalp_u16_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svadalp_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svadalp_u16_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svadalp_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svadalp,_u16,_z,)(pg, svundef_u16(), svundef_u8()); - // expected-error@+2 {{'svadalp_u16_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svadalp_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svadalp_u16_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svadalp_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svadalp,_u16,_m,)(pg, svundef_u16(), svundef_u8()); - // expected-error@+2 {{'svadalp_u16_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svadalp_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svadalp_u16_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svadalp_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svadalp,_u16,_x,)(pg, svundef_u16(), svundef_u8()); - // expected-error@+2 {{'svpmullt_u16' needs target feature sve2|sme}} - // overload-error@+1 {{'svpmullt' needs target feature sve2|sme}} + // expected-error@+2 {{'svpmullt_u16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svpmullt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svpmullt,_u16,,)(svundef_u8(), svundef_u8()); - // expected-error@+2 {{'svpmullt_n_u16' needs target feature sve2|sme}} - // overload-error@+1 {{'svpmullt' needs target feature sve2|sme}} + // expected-error@+2 {{'svpmullt_n_u16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svpmullt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svpmullt,_n_u16,,)(svundef_u8(), u8); - // expected-error@+2 {{'svsubwt_u16' needs target feature sve2|sme}} - // overload-error@+1 {{'svsubwt' needs target feature sve2|sme}} + // expected-error@+2 {{'svsubwt_u16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svsubwt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svsubwt,_u16,,)(svundef_u16(), svundef_u8()); - // expected-error@+2 {{'svsubwt_n_u16' needs target feature sve2|sme}} - // overload-error@+1 {{'svsubwt' needs target feature sve2|sme}} + // expected-error@+2 {{'svsubwt_n_u16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svsubwt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svsubwt,_n_u16,,)(svundef_u16(), u8); - // expected-error@+2 {{'svqsubr_u16_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svqsubr_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svqsubr_u16_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqsubr_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqsubr,_u16,_z,)(pg, svundef_u16(), svundef_u16()); - // expected-error@+2 {{'svqsubr_u16_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svqsubr_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svqsubr_u16_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqsubr_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqsubr,_u16,_m,)(pg, svundef_u16(), svundef_u16()); - // expected-error@+2 {{'svqsubr_u16_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svqsubr_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svqsubr_u16_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqsubr_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqsubr,_u16,_x,)(pg, svundef_u16(), svundef_u16()); - // expected-error@+2 {{'svqsubr_n_u16_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svqsubr_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svqsubr_n_u16_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqsubr_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqsubr,_n_u16,_z,)(pg, svundef_u16(), u16); - // expected-error@+2 {{'svqsubr_n_u16_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svqsubr_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svqsubr_n_u16_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqsubr_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqsubr,_n_u16,_m,)(pg, svundef_u16(), u16); - // expected-error@+2 {{'svqsubr_n_u16_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svqsubr_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svqsubr_n_u16_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqsubr_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqsubr,_n_u16,_x,)(pg, svundef_u16(), u16); - // expected-error@+2 {{'svaddp_u16_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svaddp_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svaddp_u16_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svaddp_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svaddp,_u16,_m,)(pg, svundef_u16(), svundef_u16()); - // expected-error@+2 {{'svaddp_u16_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svaddp_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svaddp_u16_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svaddp_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svaddp,_u16,_x,)(pg, svundef_u16(), svundef_u16()); - // expected-error@+2 {{'svqadd_u16_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svqadd_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svqadd_u16_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqadd_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqadd,_u16,_m,)(pg, svundef_u16(), svundef_u16()); - // expected-error@+2 {{'svqadd_n_u16_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svqadd_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svqadd_n_u16_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqadd_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqadd,_n_u16,_m,)(pg, svundef_u16(), u16); - // expected-error@+2 {{'svqadd_u16_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svqadd_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svqadd_u16_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqadd_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqadd,_u16,_z,)(pg, svundef_u16(), svundef_u16()); - // expected-error@+2 {{'svqadd_n_u16_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svqadd_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svqadd_n_u16_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqadd_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqadd,_n_u16,_z,)(pg, svundef_u16(), u16); - // expected-error@+2 {{'svqadd_u16_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svqadd_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svqadd_u16_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqadd_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqadd,_u16,_x,)(pg, svundef_u16(), svundef_u16()); - // expected-error@+2 {{'svqadd_n_u16_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svqadd_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svqadd_n_u16_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqadd_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqadd,_n_u16,_x,)(pg, svundef_u16(), u16); - // expected-error@+2 {{'svabdlb_u16' needs target feature sve2|sme}} - // overload-error@+1 {{'svabdlb' needs target feature sve2|sme}} + // expected-error@+2 {{'svabdlb_u16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svabdlb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svabdlb,_u16,,)(svundef_u8(), svundef_u8()); - // expected-error@+2 {{'svabdlb_n_u16' needs target feature sve2|sme}} - // overload-error@+1 {{'svabdlb' needs target feature sve2|sme}} + // expected-error@+2 {{'svabdlb_n_u16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svabdlb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svabdlb,_n_u16,,)(svundef_u8(), u8); - // expected-error@+2 {{'svtbx_u16' needs target feature sve2|sme}} - // overload-error@+1 {{'svtbx' needs target feature sve2|sme}} + // expected-error@+2 {{'svtbx_u16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svtbx' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svtbx,_u16,,)(svundef_u16(), svundef_u16(), svundef_u16()); - // expected-error@+2 {{'svabdlt_u16' needs target feature sve2|sme}} - // overload-error@+1 {{'svabdlt' needs target feature sve2|sme}} + // expected-error@+2 {{'svabdlt_u16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svabdlt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svabdlt,_u16,,)(svundef_u8(), svundef_u8()); - // expected-error@+2 {{'svabdlt_n_u16' needs target feature sve2|sme}} - // overload-error@+1 {{'svabdlt' needs target feature sve2|sme}} + // expected-error@+2 {{'svabdlt_n_u16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svabdlt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svabdlt,_n_u16,,)(svundef_u8(), u8); - // expected-error@+2 {{'svminp_u16_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svminp_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svminp_u16_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svminp_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svminp,_u16,_m,)(pg, svundef_u16(), svundef_u16()); - // expected-error@+2 {{'svminp_u16_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svminp_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svminp_u16_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svminp_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svminp,_u16,_x,)(pg, svundef_u16(), svundef_u16()); - // expected-error@+2 {{'svsqadd_u16_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svsqadd_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svsqadd_u16_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svsqadd_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svsqadd,_u16,_m,)(pg, svundef_u16(), svundef_s16()); - // expected-error@+2 {{'svsqadd_n_u16_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svsqadd_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svsqadd_n_u16_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svsqadd_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svsqadd,_n_u16,_m,)(pg, svundef_u16(), i16); - // expected-error@+2 {{'svsqadd_u16_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svsqadd_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svsqadd_u16_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svsqadd_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svsqadd,_u16,_z,)(pg, svundef_u16(), svundef_s16()); - // expected-error@+2 {{'svsqadd_n_u16_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svsqadd_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svsqadd_n_u16_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svsqadd_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svsqadd,_n_u16,_z,)(pg, svundef_u16(), i16); - // expected-error@+2 {{'svsqadd_u16_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svsqadd_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svsqadd_u16_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svsqadd_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svsqadd,_u16,_x,)(pg, svundef_u16(), svundef_s16()); - // expected-error@+2 {{'svsqadd_n_u16_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svsqadd_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svsqadd_n_u16_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svsqadd_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svsqadd,_n_u16,_x,)(pg, svundef_u16(), i16); - // expected-error@+2 {{'svqsub_u16_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svqsub_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svqsub_u16_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqsub_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqsub,_u16,_z,)(pg, svundef_u16(), svundef_u16()); - // expected-error@+2 {{'svqsub_u16_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svqsub_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svqsub_u16_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqsub_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqsub,_u16,_m,)(pg, svundef_u16(), svundef_u16()); - // expected-error@+2 {{'svqsub_u16_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svqsub_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svqsub_u16_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqsub_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqsub,_u16,_x,)(pg, svundef_u16(), svundef_u16()); - // expected-error@+2 {{'svqsub_n_u16_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svqsub_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svqsub_n_u16_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqsub_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqsub,_n_u16,_z,)(pg, svundef_u16(), u16); - // expected-error@+2 {{'svqsub_n_u16_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svqsub_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svqsub_n_u16_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqsub_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqsub,_n_u16,_m,)(pg, svundef_u16(), u16); - // expected-error@+2 {{'svqsub_n_u16_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svqsub_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svqsub_n_u16_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqsub_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqsub,_n_u16,_x,)(pg, svundef_u16(), u16); - // expected-error@+2 {{'svrsubhnb_u16' needs target feature sve2|sme}} - // overload-error@+1 {{'svrsubhnb' needs target feature sve2|sme}} + // expected-error@+2 {{'svrsubhnb_u16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrsubhnb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrsubhnb,_u16,,)(svundef_u16(), svundef_u16()); - // expected-error@+2 {{'svrsubhnb_n_u16' needs target feature sve2|sme}} - // overload-error@+1 {{'svrsubhnb' needs target feature sve2|sme}} + // expected-error@+2 {{'svrsubhnb_n_u16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrsubhnb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrsubhnb,_n_u16,,)(svundef_u16(), u16); - // expected-error@+2 {{'svaddhnb_u16' needs target feature sve2|sme}} - // overload-error@+1 {{'svaddhnb' needs target feature sve2|sme}} + // expected-error@+2 {{'svaddhnb_u16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svaddhnb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svaddhnb,_u16,,)(svundef_u16(), svundef_u16()); - // expected-error@+2 {{'svaddhnb_n_u16' needs target feature sve2|sme}} - // overload-error@+1 {{'svaddhnb' needs target feature sve2|sme}} + // expected-error@+2 {{'svaddhnb_n_u16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svaddhnb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svaddhnb,_n_u16,,)(svundef_u16(), u16); - // expected-error@+2 {{'svabalt_u16' needs target feature sve2|sme}} - // overload-error@+1 {{'svabalt' needs target feature sve2|sme}} + // expected-error@+2 {{'svabalt_u16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svabalt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svabalt,_u16,,)(svundef_u16(), svundef_u8(), svundef_u8()); - // expected-error@+2 {{'svabalt_n_u16' needs target feature sve2|sme}} - // overload-error@+1 {{'svabalt' needs target feature sve2|sme}} + // expected-error@+2 {{'svabalt_n_u16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svabalt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svabalt,_n_u16,,)(svundef_u16(), svundef_u8(), u8); - // expected-error@+2 {{'sveor3_u16' needs target feature sve2|sme}} - // overload-error@+1 {{'sveor3' needs target feature sve2|sme}} + // expected-error@+2 {{'sveor3_u16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'sveor3' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(sveor3,_u16,,)(svundef_u16(), svundef_u16(), svundef_u16()); - // expected-error@+2 {{'sveor3_n_u16' needs target feature sve2|sme}} - // overload-error@+1 {{'sveor3' needs target feature sve2|sme}} + // expected-error@+2 {{'sveor3_n_u16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'sveor3' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(sveor3,_n_u16,,)(svundef_u16(), svundef_u16(), u16); - // expected-error@+2 {{'svhadd_u16_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svhadd_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svhadd_u16_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhadd_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhadd,_u16,_m,)(pg, svundef_u16(), svundef_u16()); - // expected-error@+2 {{'svhadd_n_u16_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svhadd_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svhadd_n_u16_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhadd_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhadd,_n_u16,_m,)(pg, svundef_u16(), u16); - // expected-error@+2 {{'svhadd_u16_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svhadd_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svhadd_u16_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhadd_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhadd,_u16,_z,)(pg, svundef_u16(), svundef_u16()); - // expected-error@+2 {{'svhadd_n_u16_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svhadd_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svhadd_n_u16_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhadd_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhadd,_n_u16,_z,)(pg, svundef_u16(), u16); - // expected-error@+2 {{'svhadd_u16_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svhadd_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svhadd_u16_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhadd_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhadd,_u16,_x,)(pg, svundef_u16(), svundef_u16()); - // expected-error@+2 {{'svhadd_n_u16_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svhadd_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svhadd_n_u16_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhadd_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhadd,_n_u16,_x,)(pg, svundef_u16(), u16); - // expected-error@+2 {{'svmovlb_u16' needs target feature sve2|sme}} - // overload-error@+1 {{'svmovlb' needs target feature sve2|sme}} + // expected-error@+2 {{'svmovlb_u16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svmovlb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svmovlb,_u16,,)(svundef_u8()); - // expected-error@+2 {{'svmaxp_u16_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svmaxp_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svmaxp_u16_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svmaxp_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svmaxp,_u16,_m,)(pg, svundef_u16(), svundef_u16()); - // expected-error@+2 {{'svmaxp_u16_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svmaxp_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svmaxp_u16_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svmaxp_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svmaxp,_u16,_x,)(pg, svundef_u16(), svundef_u16()); - // expected-error@+2 {{'svmullt_u16' needs target feature sve2|sme}} - // overload-error@+1 {{'svmullt' needs target feature sve2|sme}} + // expected-error@+2 {{'svmullt_u16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svmullt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svmullt,_u16,,)(svundef_u8(), svundef_u8()); - // expected-error@+2 {{'svmullt_n_u16' needs target feature sve2|sme}} - // overload-error@+1 {{'svmullt' needs target feature sve2|sme}} + // expected-error@+2 {{'svmullt_n_u16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svmullt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svmullt,_n_u16,,)(svundef_u8(), u8); - // expected-error@+2 {{'svmatch_u16' needs target feature sve2}} - // overload-error@+1 {{'svmatch' needs target feature sve2}} + // expected-error@+2 {{'svmatch_u16' needs target feature sve,sve2}} + // overload-error@+1 {{'svmatch' needs target feature sve,sve2}} SVE_ACLE_FUNC(svmatch,_u16,,)(pg, svundef_u16(), svundef_u16()); - // expected-error@+2 {{'svwhilerw_u16' needs target feature sve2|sme}} - // overload-error@+1 {{'svwhilerw' needs target feature sve2|sme}} + // expected-error@+2 {{'svwhilerw_u16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svwhilerw' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svwhilerw,_u16,,)(const_u16_ptr, const_u16_ptr); - // expected-error@+2 {{'svrhadd_u16_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svrhadd_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svrhadd_u16_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrhadd_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrhadd,_u16,_m,)(pg, svundef_u16(), svundef_u16()); - // expected-error@+2 {{'svrhadd_n_u16_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svrhadd_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svrhadd_n_u16_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrhadd_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrhadd,_n_u16,_m,)(pg, svundef_u16(), u16); - // expected-error@+2 {{'svrhadd_u16_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svrhadd_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svrhadd_u16_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrhadd_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrhadd,_u16,_z,)(pg, svundef_u16(), svundef_u16()); - // expected-error@+2 {{'svrhadd_n_u16_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svrhadd_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svrhadd_n_u16_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrhadd_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrhadd,_n_u16,_z,)(pg, svundef_u16(), u16); - // expected-error@+2 {{'svrhadd_u16_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svrhadd_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svrhadd_u16_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrhadd_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrhadd,_u16,_x,)(pg, svundef_u16(), svundef_u16()); - // expected-error@+2 {{'svrhadd_n_u16_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svrhadd_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svrhadd_n_u16_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrhadd_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrhadd,_n_u16,_x,)(pg, svundef_u16(), u16); - // expected-error@+2 {{'svraddhnb_u16' needs target feature sve2|sme}} - // overload-error@+1 {{'svraddhnb' needs target feature sve2|sme}} + // expected-error@+2 {{'svraddhnb_u16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svraddhnb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svraddhnb,_u16,,)(svundef_u16(), svundef_u16()); - // expected-error@+2 {{'svraddhnb_n_u16' needs target feature sve2|sme}} - // overload-error@+1 {{'svraddhnb' needs target feature sve2|sme}} + // expected-error@+2 {{'svraddhnb_n_u16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svraddhnb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svraddhnb,_n_u16,,)(svundef_u16(), u16); - // expected-error@+2 {{'svwhilewr_u16' needs target feature sve2|sme}} - // overload-error@+1 {{'svwhilewr' needs target feature sve2|sme}} + // expected-error@+2 {{'svwhilewr_u16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svwhilewr' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svwhilewr,_u16,,)(const_u16_ptr, const_u16_ptr); - // expected-error@+2 {{'svmlalb_u16' needs target feature sve2|sme}} - // overload-error@+1 {{'svmlalb' needs target feature sve2|sme}} + // expected-error@+2 {{'svmlalb_u16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svmlalb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svmlalb,_u16,,)(svundef_u16(), svundef_u8(), svundef_u8()); - // expected-error@+2 {{'svmlalb_n_u16' needs target feature sve2|sme}} - // overload-error@+1 {{'svmlalb' needs target feature sve2|sme}} + // expected-error@+2 {{'svmlalb_n_u16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svmlalb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svmlalb,_n_u16,,)(svundef_u16(), svundef_u8(), u8); - // expected-error@+2 {{'svsubwb_u16' needs target feature sve2|sme}} - // overload-error@+1 {{'svsubwb' needs target feature sve2|sme}} + // expected-error@+2 {{'svsubwb_u16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svsubwb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svsubwb,_u16,,)(svundef_u16(), svundef_u8()); - // expected-error@+2 {{'svsubwb_n_u16' needs target feature sve2|sme}} - // overload-error@+1 {{'svsubwb' needs target feature sve2|sme}} + // expected-error@+2 {{'svsubwb_n_u16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svsubwb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svsubwb,_n_u16,,)(svundef_u16(), u8); - // expected-error@+2 {{'svnmatch_u16' needs target feature sve2}} - // overload-error@+1 {{'svnmatch' needs target feature sve2}} + // expected-error@+2 {{'svnmatch_u16' needs target feature sve,sve2}} + // overload-error@+1 {{'svnmatch' needs target feature sve,sve2}} SVE_ACLE_FUNC(svnmatch,_u16,,)(pg, svundef_u16(), svundef_u16()); - // expected-error@+2 {{'svaba_u16' needs target feature sve2|sme}} - // overload-error@+1 {{'svaba' needs target feature sve2|sme}} + // expected-error@+2 {{'svaba_u16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svaba' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svaba,_u16,,)(svundef_u16(), svundef_u16(), svundef_u16()); - // expected-error@+2 {{'svaba_n_u16' needs target feature sve2|sme}} - // overload-error@+1 {{'svaba' needs target feature sve2|sme}} + // expected-error@+2 {{'svaba_n_u16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svaba' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svaba,_n_u16,,)(svundef_u16(), svundef_u16(), u16); - // expected-error@+2 {{'svraddhnt_u16' needs target feature sve2|sme}} - // overload-error@+1 {{'svraddhnt' needs target feature sve2|sme}} + // expected-error@+2 {{'svraddhnt_u16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svraddhnt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svraddhnt,_u16,,)(svundef_u8(), svundef_u16(), svundef_u16()); - // expected-error@+2 {{'svraddhnt_n_u16' needs target feature sve2|sme}} - // overload-error@+1 {{'svraddhnt' needs target feature sve2|sme}} + // expected-error@+2 {{'svraddhnt_n_u16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svraddhnt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svraddhnt,_n_u16,,)(svundef_u8(), svundef_u16(), u16); - // expected-error@+2 {{'sveorbt_u16' needs target feature sve2|sme}} - // overload-error@+1 {{'sveorbt' needs target feature sve2|sme}} + // expected-error@+2 {{'sveorbt_u16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'sveorbt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(sveorbt,_u16,,)(svundef_u16(), svundef_u16(), svundef_u16()); - // expected-error@+2 {{'sveorbt_n_u16' needs target feature sve2|sme}} - // overload-error@+1 {{'sveorbt' needs target feature sve2|sme}} + // expected-error@+2 {{'sveorbt_n_u16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'sveorbt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(sveorbt,_n_u16,,)(svundef_u16(), svundef_u16(), u16); - // expected-error@+2 {{'svbsl_u16' needs target feature sve2|sme}} - // overload-error@+1 {{'svbsl' needs target feature sve2|sme}} + // expected-error@+2 {{'svbsl_u16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svbsl' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svbsl,_u16,,)(svundef_u16(), svundef_u16(), svundef_u16()); - // expected-error@+2 {{'svbsl_n_u16' needs target feature sve2|sme}} - // overload-error@+1 {{'svbsl' needs target feature sve2|sme}} + // expected-error@+2 {{'svbsl_n_u16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svbsl' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svbsl,_n_u16,,)(svundef_u16(), svundef_u16(), u16); - // expected-error@+2 {{'svhsub_u16_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svhsub_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svhsub_u16_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhsub_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhsub,_u16,_z,)(pg, svundef_u16(), svundef_u16()); - // expected-error@+2 {{'svhsub_u16_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svhsub_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svhsub_u16_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhsub_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhsub,_u16,_m,)(pg, svundef_u16(), svundef_u16()); - // expected-error@+2 {{'svhsub_u16_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svhsub_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svhsub_u16_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhsub_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhsub,_u16,_x,)(pg, svundef_u16(), svundef_u16()); - // expected-error@+2 {{'svhsub_n_u16_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svhsub_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svhsub_n_u16_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhsub_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhsub,_n_u16,_z,)(pg, svundef_u16(), u16); - // expected-error@+2 {{'svhsub_n_u16_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svhsub_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svhsub_n_u16_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhsub_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhsub,_n_u16,_m,)(pg, svundef_u16(), u16); - // expected-error@+2 {{'svhsub_n_u16_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svhsub_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svhsub_n_u16_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhsub_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhsub,_n_u16,_x,)(pg, svundef_u16(), u16); - // expected-error@+2 {{'svaddlb_u16' needs target feature sve2|sme}} - // overload-error@+1 {{'svaddlb' needs target feature sve2|sme}} + // expected-error@+2 {{'svaddlb_u16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svaddlb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svaddlb,_u16,,)(svundef_u8(), svundef_u8()); - // expected-error@+2 {{'svaddlb_n_u16' needs target feature sve2|sme}} - // overload-error@+1 {{'svaddlb' needs target feature sve2|sme}} + // expected-error@+2 {{'svaddlb_n_u16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svaddlb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svaddlb,_n_u16,,)(svundef_u8(), u8); - // expected-error@+2 {{'svbsl2n_u16' needs target feature sve2|sme}} - // overload-error@+1 {{'svbsl2n' needs target feature sve2|sme}} + // expected-error@+2 {{'svbsl2n_u16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svbsl2n' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svbsl2n,_u16,,)(svundef_u16(), svundef_u16(), svundef_u16()); - // expected-error@+2 {{'svbsl2n_n_u16' needs target feature sve2|sme}} - // overload-error@+1 {{'svbsl2n' needs target feature sve2|sme}} + // expected-error@+2 {{'svbsl2n_n_u16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svbsl2n' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svbsl2n,_n_u16,,)(svundef_u16(), svundef_u16(), u16); - // expected-error@+2 {{'svaddlt_u16' needs target feature sve2|sme}} - // overload-error@+1 {{'svaddlt' needs target feature sve2|sme}} + // expected-error@+2 {{'svaddlt_u16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svaddlt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svaddlt,_u16,,)(svundef_u8(), svundef_u8()); - // expected-error@+2 {{'svaddlt_n_u16' needs target feature sve2|sme}} - // overload-error@+1 {{'svaddlt' needs target feature sve2|sme}} + // expected-error@+2 {{'svaddlt_n_u16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svaddlt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svaddlt,_n_u16,,)(svundef_u8(), u8); - // expected-error@+2 {{'svabalb_u16' needs target feature sve2|sme}} - // overload-error@+1 {{'svabalb' needs target feature sve2|sme}} + // expected-error@+2 {{'svabalb_u16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svabalb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svabalb,_u16,,)(svundef_u16(), svundef_u8(), svundef_u8()); - // expected-error@+2 {{'svabalb_n_u16' needs target feature sve2|sme}} - // overload-error@+1 {{'svabalb' needs target feature sve2|sme}} + // expected-error@+2 {{'svabalb_n_u16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svabalb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svabalb,_n_u16,,)(svundef_u16(), svundef_u8(), u8); - // expected-error@+2 {{'svsublb_u16' needs target feature sve2|sme}} - // overload-error@+1 {{'svsublb' needs target feature sve2|sme}} + // expected-error@+2 {{'svsublb_u16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svsublb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svsublb,_u16,,)(svundef_u8(), svundef_u8()); - // expected-error@+2 {{'svsublb_n_u16' needs target feature sve2|sme}} - // overload-error@+1 {{'svsublb' needs target feature sve2|sme}} + // expected-error@+2 {{'svsublb_n_u16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svsublb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svsublb,_n_u16,,)(svundef_u8(), u8); - // expected-error@+2 {{'svbsl1n_u16' needs target feature sve2|sme}} - // overload-error@+1 {{'svbsl1n' needs target feature sve2|sme}} + // expected-error@+2 {{'svbsl1n_u16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svbsl1n' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svbsl1n,_u16,,)(svundef_u16(), svundef_u16(), svundef_u16()); - // expected-error@+2 {{'svbsl1n_n_u16' needs target feature sve2|sme}} - // overload-error@+1 {{'svbsl1n' needs target feature sve2|sme}} + // expected-error@+2 {{'svbsl1n_n_u16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svbsl1n' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svbsl1n,_n_u16,,)(svundef_u16(), svundef_u16(), u16); - // expected-error@+2 {{'svrshl_u16_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svrshl_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svrshl_u16_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrshl_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrshl,_u16,_z,)(pg, svundef_u16(), svundef_s16()); - // expected-error@+2 {{'svrshl_u16_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svrshl_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svrshl_u16_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrshl_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrshl,_u16,_m,)(pg, svundef_u16(), svundef_s16()); - // expected-error@+2 {{'svrshl_u16_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svrshl_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svrshl_u16_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrshl_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrshl,_u16,_x,)(pg, svundef_u16(), svundef_s16()); - // expected-error@+2 {{'svrshl_n_u16_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svrshl_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svrshl_n_u16_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrshl_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrshl,_n_u16,_z,)(pg, svundef_u16(), i16); - // expected-error@+2 {{'svrshl_n_u16_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svrshl_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svrshl_n_u16_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrshl_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrshl,_n_u16,_m,)(pg, svundef_u16(), i16); - // expected-error@+2 {{'svrshl_n_u16_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svrshl_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svrshl_n_u16_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrshl_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrshl,_n_u16,_x,)(pg, svundef_u16(), i16); - // expected-error@+2 {{'svaddwt_u16' needs target feature sve2|sme}} - // overload-error@+1 {{'svaddwt' needs target feature sve2|sme}} + // expected-error@+2 {{'svaddwt_u16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svaddwt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svaddwt,_u16,,)(svundef_u16(), svundef_u8()); - // expected-error@+2 {{'svaddwt_n_u16' needs target feature sve2|sme}} - // overload-error@+1 {{'svaddwt' needs target feature sve2|sme}} + // expected-error@+2 {{'svaddwt_n_u16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svaddwt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svaddwt,_n_u16,,)(svundef_u16(), u8); - // expected-error@+2 {{'svmlslb_u16' needs target feature sve2|sme}} - // overload-error@+1 {{'svmlslb' needs target feature sve2|sme}} + // expected-error@+2 {{'svmlslb_u16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svmlslb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svmlslb,_u16,,)(svundef_u16(), svundef_u8(), svundef_u8()); - // expected-error@+2 {{'svmlslb_n_u16' needs target feature sve2|sme}} - // overload-error@+1 {{'svmlslb' needs target feature sve2|sme}} + // expected-error@+2 {{'svmlslb_n_u16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svmlslb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svmlslb,_n_u16,,)(svundef_u16(), svundef_u8(), u8); - // expected-error@+2 {{'svmlslt_u16' needs target feature sve2|sme}} - // overload-error@+1 {{'svmlslt' needs target feature sve2|sme}} + // expected-error@+2 {{'svmlslt_u16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svmlslt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svmlslt,_u16,,)(svundef_u16(), svundef_u8(), svundef_u8()); - // expected-error@+2 {{'svmlslt_n_u16' needs target feature sve2|sme}} - // overload-error@+1 {{'svmlslt' needs target feature sve2|sme}} + // expected-error@+2 {{'svmlslt_n_u16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svmlslt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svmlslt,_n_u16,,)(svundef_u16(), svundef_u8(), u8); - // expected-error@+2 {{'svmovlt_u16' needs target feature sve2|sme}} - // overload-error@+1 {{'svmovlt' needs target feature sve2|sme}} + // expected-error@+2 {{'svmovlt_u16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svmovlt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svmovlt,_u16,,)(svundef_u8()); - // expected-error@+2 {{'svqshl_u16_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svqshl_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svqshl_u16_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqshl_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqshl,_u16,_z,)(pg, svundef_u16(), svundef_s16()); - // expected-error@+2 {{'svqshl_u16_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svqshl_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svqshl_u16_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqshl_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqshl,_u16,_m,)(pg, svundef_u16(), svundef_s16()); - // expected-error@+2 {{'svqshl_u16_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svqshl_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svqshl_u16_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqshl_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqshl,_u16,_x,)(pg, svundef_u16(), svundef_s16()); - // expected-error@+2 {{'svqshl_n_u16_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svqshl_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svqshl_n_u16_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqshl_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqshl,_n_u16,_z,)(pg, svundef_u16(), i16); - // expected-error@+2 {{'svqshl_n_u16_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svqshl_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svqshl_n_u16_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqshl_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqshl,_n_u16,_m,)(pg, svundef_u16(), i16); - // expected-error@+2 {{'svqshl_n_u16_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svqshl_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svqshl_n_u16_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqshl_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqshl,_n_u16,_x,)(pg, svundef_u16(), i16); - // expected-error@+2 {{'svmullb_u32' needs target feature sve2|sme}} - // overload-error@+1 {{'svmullb' needs target feature sve2|sme}} + // expected-error@+2 {{'svmullb_u32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svmullb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svmullb,_u32,,)(svundef_u16(), svundef_u16()); - // expected-error@+2 {{'svmullb_n_u32' needs target feature sve2|sme}} - // overload-error@+1 {{'svmullb' needs target feature sve2|sme}} + // expected-error@+2 {{'svmullb_n_u32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svmullb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svmullb,_n_u32,,)(svundef_u16(), u16); - // expected-error@+2 {{'svpmullb_pair_u32' needs target feature sve2|sme}} - // overload-error@+1 {{'svpmullb_pair' needs target feature sve2|sme}} + // expected-error@+2 {{'svpmullb_pair_u32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svpmullb_pair' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svpmullb_pair,_u32,,)(svundef_u32(), svundef_u32()); - // expected-error@+2 {{'svpmullb_pair_n_u32' needs target feature sve2|sme}} - // overload-error@+1 {{'svpmullb_pair' needs target feature sve2|sme}} + // expected-error@+2 {{'svpmullb_pair_n_u32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svpmullb_pair' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svpmullb_pair,_n_u32,,)(svundef_u32(), u32); - // expected-error@+2 {{'svaddwb_u32' needs target feature sve2|sme}} - // overload-error@+1 {{'svaddwb' needs target feature sve2|sme}} + // expected-error@+2 {{'svaddwb_u32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svaddwb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svaddwb,_u32,,)(svundef_u32(), svundef_u16()); - // expected-error@+2 {{'svaddwb_n_u32' needs target feature sve2|sme}} - // overload-error@+1 {{'svaddwb' needs target feature sve2|sme}} + // expected-error@+2 {{'svaddwb_n_u32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svaddwb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svaddwb,_n_u32,,)(svundef_u32(), u16); - // expected-error@+2 {{'svsubhnb_u32' needs target feature sve2|sme}} - // overload-error@+1 {{'svsubhnb' needs target feature sve2|sme}} + // expected-error@+2 {{'svsubhnb_u32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svsubhnb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svsubhnb,_u32,,)(svundef_u32(), svundef_u32()); - // expected-error@+2 {{'svsubhnb_n_u32' needs target feature sve2|sme}} - // overload-error@+1 {{'svsubhnb' needs target feature sve2|sme}} + // expected-error@+2 {{'svsubhnb_n_u32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svsubhnb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svsubhnb,_n_u32,,)(svundef_u32(), u32); - // expected-error@+2 {{'svrsubhnt_u32' needs target feature sve2|sme}} - // overload-error@+1 {{'svrsubhnt' needs target feature sve2|sme}} + // expected-error@+2 {{'svrsubhnt_u32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrsubhnt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrsubhnt,_u32,,)(svundef_u16(), svundef_u32(), svundef_u32()); - // expected-error@+2 {{'svrsubhnt_n_u32' needs target feature sve2|sme}} - // overload-error@+1 {{'svrsubhnt' needs target feature sve2|sme}} + // expected-error@+2 {{'svrsubhnt_n_u32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrsubhnt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrsubhnt,_n_u32,,)(svundef_u16(), svundef_u32(), u32); - // expected-error@+2 {{'svnbsl_u32' needs target feature sve2|sme}} - // overload-error@+1 {{'svnbsl' needs target feature sve2|sme}} + // expected-error@+2 {{'svnbsl_u32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svnbsl' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svnbsl,_u32,,)(svundef_u32(), svundef_u32(), svundef_u32()); - // expected-error@+2 {{'svnbsl_n_u32' needs target feature sve2|sme}} - // overload-error@+1 {{'svnbsl' needs target feature sve2|sme}} + // expected-error@+2 {{'svnbsl_n_u32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svnbsl' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svnbsl,_n_u32,,)(svundef_u32(), svundef_u32(), u32); - // expected-error@+2 {{'svsubhnt_u32' needs target feature sve2|sme}} - // overload-error@+1 {{'svsubhnt' needs target feature sve2|sme}} + // expected-error@+2 {{'svsubhnt_u32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svsubhnt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svsubhnt,_u32,,)(svundef_u16(), svundef_u32(), svundef_u32()); - // expected-error@+2 {{'svsubhnt_n_u32' needs target feature sve2|sme}} - // overload-error@+1 {{'svsubhnt' needs target feature sve2|sme}} + // expected-error@+2 {{'svsubhnt_n_u32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svsubhnt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svsubhnt,_n_u32,,)(svundef_u16(), svundef_u32(), u32); - // expected-error@+2 {{'svwhilegt_b8_u32' needs target feature sve2|sme}} - // overload-error@+1 {{'svwhilegt_b8' needs target feature sve2|sme}} + // expected-error@+2 {{'svwhilegt_b8_u32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svwhilegt_b8' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svwhilegt_b8,_u32,,)(u32, u32); - // expected-error@+2 {{'svwhilegt_b16_u32' needs target feature sve2|sme}} - // overload-error@+1 {{'svwhilegt_b16' needs target feature sve2|sme}} + // expected-error@+2 {{'svwhilegt_b16_u32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svwhilegt_b16' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svwhilegt_b16,_u32,,)(u32, u32); - // expected-error@+2 {{'svwhilegt_b32_u32' needs target feature sve2|sme}} - // overload-error@+1 {{'svwhilegt_b32' needs target feature sve2|sme}} + // expected-error@+2 {{'svwhilegt_b32_u32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svwhilegt_b32' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svwhilegt_b32,_u32,,)(u32, u32); - // expected-error@+2 {{'svwhilegt_b64_u32' needs target feature sve2|sme}} - // overload-error@+1 {{'svwhilegt_b64' needs target feature sve2|sme}} + // expected-error@+2 {{'svwhilegt_b64_u32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svwhilegt_b64' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svwhilegt_b64,_u32,,)(u32, u32); - // expected-error@+2 {{'svtbl2_u32' needs target feature sve2|sme}} - // overload-error@+1 {{'svtbl2' needs target feature sve2|sme}} + // expected-error@+2 {{'svtbl2_u32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svtbl2' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svtbl2,_u32,,)(svundef2_u32(), svundef_u32()); - // expected-error@+2 {{'svhsubr_u32_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svhsubr_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svhsubr_u32_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhsubr_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhsubr,_u32,_z,)(pg, svundef_u32(), svundef_u32()); - // expected-error@+2 {{'svhsubr_u32_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svhsubr_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svhsubr_u32_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhsubr_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhsubr,_u32,_m,)(pg, svundef_u32(), svundef_u32()); - // expected-error@+2 {{'svhsubr_u32_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svhsubr_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svhsubr_u32_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhsubr_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhsubr,_u32,_x,)(pg, svundef_u32(), svundef_u32()); - // expected-error@+2 {{'svhsubr_n_u32_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svhsubr_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svhsubr_n_u32_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhsubr_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhsubr,_n_u32,_z,)(pg, svundef_u32(), u32); - // expected-error@+2 {{'svhsubr_n_u32_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svhsubr_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svhsubr_n_u32_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhsubr_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhsubr,_n_u32,_m,)(pg, svundef_u32(), u32); - // expected-error@+2 {{'svhsubr_n_u32_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svhsubr_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svhsubr_n_u32_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhsubr_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhsubr,_n_u32,_x,)(pg, svundef_u32(), u32); - // expected-error@+2 {{'svhistcnt_u32_z' needs target feature sve2}} - // overload-error@+1 {{'svhistcnt_z' needs target feature sve2}} + // expected-error@+2 {{'svhistcnt_u32_z' needs target feature sve,sve2}} + // overload-error@+1 {{'svhistcnt_z' needs target feature sve,sve2}} SVE_ACLE_FUNC(svhistcnt,_u32,_z,)(pg, svundef_u32(), svundef_u32()); - // expected-error@+2 {{'sveortb_u32' needs target feature sve2|sme}} - // overload-error@+1 {{'sveortb' needs target feature sve2|sme}} + // expected-error@+2 {{'sveortb_u32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'sveortb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(sveortb,_u32,,)(svundef_u32(), svundef_u32(), svundef_u32()); - // expected-error@+2 {{'sveortb_n_u32' needs target feature sve2|sme}} - // overload-error@+1 {{'sveortb' needs target feature sve2|sme}} + // expected-error@+2 {{'sveortb_n_u32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'sveortb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(sveortb,_n_u32,,)(svundef_u32(), svundef_u32(), u32); - // expected-error@+2 {{'svqxtnb_u32' needs target feature sve2|sme}} - // overload-error@+1 {{'svqxtnb' needs target feature sve2|sme}} + // expected-error@+2 {{'svqxtnb_u32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqxtnb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqxtnb,_u32,,)(svundef_u32()); - // expected-error@+2 {{'svmlalt_u32' needs target feature sve2|sme}} - // overload-error@+1 {{'svmlalt' needs target feature sve2|sme}} + // expected-error@+2 {{'svmlalt_u32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svmlalt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svmlalt,_u32,,)(svundef_u32(), svundef_u16(), svundef_u16()); - // expected-error@+2 {{'svmlalt_n_u32' needs target feature sve2|sme}} - // overload-error@+1 {{'svmlalt' needs target feature sve2|sme}} + // expected-error@+2 {{'svmlalt_n_u32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svmlalt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svmlalt,_n_u32,,)(svundef_u32(), svundef_u16(), u16); - // expected-error@+2 {{'svaddhnt_u32' needs target feature sve2|sme}} - // overload-error@+1 {{'svaddhnt' needs target feature sve2|sme}} + // expected-error@+2 {{'svaddhnt_u32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svaddhnt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svaddhnt,_u32,,)(svundef_u16(), svundef_u32(), svundef_u32()); - // expected-error@+2 {{'svaddhnt_n_u32' needs target feature sve2|sme}} - // overload-error@+1 {{'svaddhnt' needs target feature sve2|sme}} + // expected-error@+2 {{'svaddhnt_n_u32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svaddhnt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svaddhnt,_n_u32,,)(svundef_u16(), svundef_u32(), u32); - // expected-error@+2 {{'svldnt1uh_gather_u32base_u32' needs target feature sve2}} - // overload-error@+1 {{'svldnt1uh_gather_u32' needs target feature sve2}} + // expected-error@+2 {{'svldnt1uh_gather_u32base_u32' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1uh_gather_u32' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1uh_gather, _u32base, _u32, )(pg, svundef_u32()); - // expected-error@+2 {{'svldnt1uh_gather_u32offset_u32' needs target feature sve2}} - // overload-error@+1 {{'svldnt1uh_gather_offset_u32' needs target feature sve2}} + // expected-error@+2 {{'svldnt1uh_gather_u32offset_u32' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1uh_gather_offset_u32' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1uh_gather_, u32, offset_u32, )(pg, const_u16_ptr, svundef_u32()); - // expected-error@+2 {{'svldnt1uh_gather_u32base_offset_u32' needs target feature sve2}} - // overload-error@+1 {{'svldnt1uh_gather_offset_u32' needs target feature sve2}} + // expected-error@+2 {{'svldnt1uh_gather_u32base_offset_u32' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1uh_gather_offset_u32' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1uh_gather, _u32base, _offset_u32, )(pg, svundef_u32(), i64); - // expected-error@+2 {{'svldnt1uh_gather_u32base_index_u32' needs target feature sve2}} - // overload-error@+1 {{'svldnt1uh_gather_index_u32' needs target feature sve2}} + // expected-error@+2 {{'svldnt1uh_gather_u32base_index_u32' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1uh_gather_index_u32' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1uh_gather, _u32base, _index_u32, )(pg, svundef_u32(), i64); - // expected-error@+2 {{'svbcax_u32' needs target feature sve2|sme}} - // overload-error@+1 {{'svbcax' needs target feature sve2|sme}} + // expected-error@+2 {{'svbcax_u32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svbcax' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svbcax,_u32,,)(svundef_u32(), svundef_u32(), svundef_u32()); - // expected-error@+2 {{'svbcax_n_u32' needs target feature sve2|sme}} - // overload-error@+1 {{'svbcax' needs target feature sve2|sme}} + // expected-error@+2 {{'svbcax_n_u32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svbcax' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svbcax,_n_u32,,)(svundef_u32(), svundef_u32(), u32); - // expected-error@+2 {{'svqxtnt_u32' needs target feature sve2|sme}} - // overload-error@+1 {{'svqxtnt' needs target feature sve2|sme}} + // expected-error@+2 {{'svqxtnt_u32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqxtnt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqxtnt,_u32,,)(svundef_u16(), svundef_u32()); - // expected-error@+2 {{'svqrshl_u32_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svqrshl_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svqrshl_u32_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqrshl_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqrshl,_u32,_z,)(pg, svundef_u32(), svundef_s32()); - // expected-error@+2 {{'svqrshl_u32_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svqrshl_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svqrshl_u32_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqrshl_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqrshl,_u32,_m,)(pg, svundef_u32(), svundef_s32()); - // expected-error@+2 {{'svqrshl_u32_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svqrshl_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svqrshl_u32_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqrshl_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqrshl,_u32,_x,)(pg, svundef_u32(), svundef_s32()); - // expected-error@+2 {{'svqrshl_n_u32_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svqrshl_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svqrshl_n_u32_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqrshl_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqrshl,_n_u32,_z,)(pg, svundef_u32(), i32); - // expected-error@+2 {{'svqrshl_n_u32_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svqrshl_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svqrshl_n_u32_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqrshl_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqrshl,_n_u32,_m,)(pg, svundef_u32(), i32); - // expected-error@+2 {{'svqrshl_n_u32_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svqrshl_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svqrshl_n_u32_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqrshl_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqrshl,_n_u32,_x,)(pg, svundef_u32(), i32); - // expected-error@+2 {{'svsublt_u32' needs target feature sve2|sme}} - // overload-error@+1 {{'svsublt' needs target feature sve2|sme}} + // expected-error@+2 {{'svsublt_u32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svsublt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svsublt,_u32,,)(svundef_u16(), svundef_u16()); - // expected-error@+2 {{'svsublt_n_u32' needs target feature sve2|sme}} - // overload-error@+1 {{'svsublt' needs target feature sve2|sme}} + // expected-error@+2 {{'svsublt_n_u32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svsublt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svsublt,_n_u32,,)(svundef_u16(), u16); - // expected-error@+2 {{'svadalp_u32_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svadalp_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svadalp_u32_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svadalp_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svadalp,_u32,_z,)(pg, svundef_u32(), svundef_u16()); - // expected-error@+2 {{'svadalp_u32_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svadalp_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svadalp_u32_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svadalp_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svadalp,_u32,_m,)(pg, svundef_u32(), svundef_u16()); - // expected-error@+2 {{'svadalp_u32_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svadalp_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svadalp_u32_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svadalp_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svadalp,_u32,_x,)(pg, svundef_u32(), svundef_u16()); - // expected-error@+2 {{'svwhilege_b8_u32' needs target feature sve2|sme}} - // overload-error@+1 {{'svwhilege_b8' needs target feature sve2|sme}} + // expected-error@+2 {{'svwhilege_b8_u32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svwhilege_b8' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svwhilege_b8,_u32,,)(u32, u32); - // expected-error@+2 {{'svwhilege_b16_u32' needs target feature sve2|sme}} - // overload-error@+1 {{'svwhilege_b16' needs target feature sve2|sme}} + // expected-error@+2 {{'svwhilege_b16_u32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svwhilege_b16' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svwhilege_b16,_u32,,)(u32, u32); - // expected-error@+2 {{'svwhilege_b32_u32' needs target feature sve2|sme}} - // overload-error@+1 {{'svwhilege_b32' needs target feature sve2|sme}} + // expected-error@+2 {{'svwhilege_b32_u32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svwhilege_b32' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svwhilege_b32,_u32,,)(u32, u32); - // expected-error@+2 {{'svwhilege_b64_u32' needs target feature sve2|sme}} - // overload-error@+1 {{'svwhilege_b64' needs target feature sve2|sme}} + // expected-error@+2 {{'svwhilege_b64_u32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svwhilege_b64' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svwhilege_b64,_u32,,)(u32, u32); - // expected-error@+2 {{'svpmullt_pair_u32' needs target feature sve2|sme}} - // overload-error@+1 {{'svpmullt_pair' needs target feature sve2|sme}} + // expected-error@+2 {{'svpmullt_pair_u32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svpmullt_pair' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svpmullt_pair,_u32,,)(svundef_u32(), svundef_u32()); - // expected-error@+2 {{'svpmullt_pair_n_u32' needs target feature sve2|sme}} - // overload-error@+1 {{'svpmullt_pair' needs target feature sve2|sme}} + // expected-error@+2 {{'svpmullt_pair_n_u32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svpmullt_pair' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svpmullt_pair,_n_u32,,)(svundef_u32(), u32); - // expected-error@+2 {{'svsubwt_u32' needs target feature sve2|sme}} - // overload-error@+1 {{'svsubwt' needs target feature sve2|sme}} + // expected-error@+2 {{'svsubwt_u32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svsubwt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svsubwt,_u32,,)(svundef_u32(), svundef_u16()); - // expected-error@+2 {{'svsubwt_n_u32' needs target feature sve2|sme}} - // overload-error@+1 {{'svsubwt' needs target feature sve2|sme}} + // expected-error@+2 {{'svsubwt_n_u32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svsubwt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svsubwt,_n_u32,,)(svundef_u32(), u16); - // expected-error@+2 {{'svqsubr_u32_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svqsubr_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svqsubr_u32_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqsubr_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqsubr,_u32,_z,)(pg, svundef_u32(), svundef_u32()); - // expected-error@+2 {{'svqsubr_u32_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svqsubr_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svqsubr_u32_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqsubr_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqsubr,_u32,_m,)(pg, svundef_u32(), svundef_u32()); - // expected-error@+2 {{'svqsubr_u32_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svqsubr_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svqsubr_u32_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqsubr_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqsubr,_u32,_x,)(pg, svundef_u32(), svundef_u32()); - // expected-error@+2 {{'svqsubr_n_u32_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svqsubr_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svqsubr_n_u32_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqsubr_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqsubr,_n_u32,_z,)(pg, svundef_u32(), u32); - // expected-error@+2 {{'svqsubr_n_u32_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svqsubr_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svqsubr_n_u32_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqsubr_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqsubr,_n_u32,_m,)(pg, svundef_u32(), u32); - // expected-error@+2 {{'svqsubr_n_u32_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svqsubr_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svqsubr_n_u32_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqsubr_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqsubr,_n_u32,_x,)(pg, svundef_u32(), u32); - // expected-error@+2 {{'svadclt_u32' needs target feature sve2|sme}} - // overload-error@+1 {{'svadclt' needs target feature sve2|sme}} + // expected-error@+2 {{'svadclt_u32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svadclt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svadclt,_u32,,)(svundef_u32(), svundef_u32(), svundef_u32()); - // expected-error@+2 {{'svadclt_n_u32' needs target feature sve2|sme}} - // overload-error@+1 {{'svadclt' needs target feature sve2|sme}} + // expected-error@+2 {{'svadclt_n_u32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svadclt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svadclt,_n_u32,,)(svundef_u32(), svundef_u32(), u32); - // expected-error@+2 {{'svaddp_u32_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svaddp_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svaddp_u32_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svaddp_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svaddp,_u32,_m,)(pg, svundef_u32(), svundef_u32()); - // expected-error@+2 {{'svaddp_u32_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svaddp_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svaddp_u32_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svaddp_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svaddp,_u32,_x,)(pg, svundef_u32(), svundef_u32()); - // expected-error@+2 {{'svrecpe_u32_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svrecpe_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svrecpe_u32_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrecpe_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrecpe,_u32,_z,)(pg, svundef_u32()); - // expected-error@+2 {{'svrecpe_u32_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svrecpe_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svrecpe_u32_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrecpe_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrecpe,_u32,_m,)(svundef_u32(), pg, svundef_u32()); - // expected-error@+2 {{'svrecpe_u32_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svrecpe_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svrecpe_u32_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrecpe_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrecpe,_u32,_x,)(pg, svundef_u32()); - // expected-error@+2 {{'svqadd_u32_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svqadd_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svqadd_u32_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqadd_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqadd,_u32,_m,)(pg, svundef_u32(), svundef_u32()); - // expected-error@+2 {{'svqadd_n_u32_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svqadd_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svqadd_n_u32_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqadd_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqadd,_n_u32,_m,)(pg, svundef_u32(), u32); - // expected-error@+2 {{'svqadd_u32_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svqadd_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svqadd_u32_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqadd_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqadd,_u32,_z,)(pg, svundef_u32(), svundef_u32()); - // expected-error@+2 {{'svqadd_n_u32_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svqadd_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svqadd_n_u32_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqadd_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqadd,_n_u32,_z,)(pg, svundef_u32(), u32); - // expected-error@+2 {{'svqadd_u32_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svqadd_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svqadd_u32_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqadd_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqadd,_u32,_x,)(pg, svundef_u32(), svundef_u32()); - // expected-error@+2 {{'svqadd_n_u32_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svqadd_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svqadd_n_u32_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqadd_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqadd,_n_u32,_x,)(pg, svundef_u32(), u32); - // expected-error@+2 {{'svabdlb_u32' needs target feature sve2|sme}} - // overload-error@+1 {{'svabdlb' needs target feature sve2|sme}} + // expected-error@+2 {{'svabdlb_u32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svabdlb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svabdlb,_u32,,)(svundef_u16(), svundef_u16()); - // expected-error@+2 {{'svabdlb_n_u32' needs target feature sve2|sme}} - // overload-error@+1 {{'svabdlb' needs target feature sve2|sme}} + // expected-error@+2 {{'svabdlb_n_u32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svabdlb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svabdlb,_n_u32,,)(svundef_u16(), u16); - // expected-error@+2 {{'svtbx_u32' needs target feature sve2|sme}} - // overload-error@+1 {{'svtbx' needs target feature sve2|sme}} + // expected-error@+2 {{'svtbx_u32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svtbx' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svtbx,_u32,,)(svundef_u32(), svundef_u32(), svundef_u32()); - // expected-error@+2 {{'svabdlt_u32' needs target feature sve2|sme}} - // overload-error@+1 {{'svabdlt' needs target feature sve2|sme}} + // expected-error@+2 {{'svabdlt_u32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svabdlt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svabdlt,_u32,,)(svundef_u16(), svundef_u16()); - // expected-error@+2 {{'svabdlt_n_u32' needs target feature sve2|sme}} - // overload-error@+1 {{'svabdlt' needs target feature sve2|sme}} + // expected-error@+2 {{'svabdlt_n_u32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svabdlt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svabdlt,_n_u32,,)(svundef_u16(), u16); - // expected-error@+2 {{'svminp_u32_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svminp_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svminp_u32_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svminp_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svminp,_u32,_m,)(pg, svundef_u32(), svundef_u32()); - // expected-error@+2 {{'svminp_u32_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svminp_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svminp_u32_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svminp_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svminp,_u32,_x,)(pg, svundef_u32(), svundef_u32()); - // expected-error@+2 {{'svsqadd_u32_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svsqadd_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svsqadd_u32_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svsqadd_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svsqadd,_u32,_m,)(pg, svundef_u32(), svundef_s32()); - // expected-error@+2 {{'svsqadd_n_u32_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svsqadd_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svsqadd_n_u32_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svsqadd_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svsqadd,_n_u32,_m,)(pg, svundef_u32(), i32); - // expected-error@+2 {{'svsqadd_u32_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svsqadd_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svsqadd_u32_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svsqadd_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svsqadd,_u32,_z,)(pg, svundef_u32(), svundef_s32()); - // expected-error@+2 {{'svsqadd_n_u32_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svsqadd_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svsqadd_n_u32_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svsqadd_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svsqadd,_n_u32,_z,)(pg, svundef_u32(), i32); - // expected-error@+2 {{'svsqadd_u32_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svsqadd_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svsqadd_u32_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svsqadd_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svsqadd,_u32,_x,)(pg, svundef_u32(), svundef_s32()); - // expected-error@+2 {{'svsqadd_n_u32_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svsqadd_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svsqadd_n_u32_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svsqadd_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svsqadd,_n_u32,_x,)(pg, svundef_u32(), i32); - // expected-error@+2 {{'svqsub_u32_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svqsub_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svqsub_u32_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqsub_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqsub,_u32,_z,)(pg, svundef_u32(), svundef_u32()); - // expected-error@+2 {{'svqsub_u32_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svqsub_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svqsub_u32_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqsub_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqsub,_u32,_m,)(pg, svundef_u32(), svundef_u32()); - // expected-error@+2 {{'svqsub_u32_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svqsub_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svqsub_u32_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqsub_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqsub,_u32,_x,)(pg, svundef_u32(), svundef_u32()); - // expected-error@+2 {{'svqsub_n_u32_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svqsub_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svqsub_n_u32_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqsub_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqsub,_n_u32,_z,)(pg, svundef_u32(), u32); - // expected-error@+2 {{'svqsub_n_u32_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svqsub_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svqsub_n_u32_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqsub_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqsub,_n_u32,_m,)(pg, svundef_u32(), u32); - // expected-error@+2 {{'svqsub_n_u32_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svqsub_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svqsub_n_u32_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqsub_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqsub,_n_u32,_x,)(pg, svundef_u32(), u32); - // expected-error@+2 {{'svrsubhnb_u32' needs target feature sve2|sme}} - // overload-error@+1 {{'svrsubhnb' needs target feature sve2|sme}} + // expected-error@+2 {{'svrsubhnb_u32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrsubhnb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrsubhnb,_u32,,)(svundef_u32(), svundef_u32()); - // expected-error@+2 {{'svrsubhnb_n_u32' needs target feature sve2|sme}} - // overload-error@+1 {{'svrsubhnb' needs target feature sve2|sme}} + // expected-error@+2 {{'svrsubhnb_n_u32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrsubhnb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrsubhnb,_n_u32,,)(svundef_u32(), u32); - // expected-error@+2 {{'svaddhnb_u32' needs target feature sve2|sme}} - // overload-error@+1 {{'svaddhnb' needs target feature sve2|sme}} + // expected-error@+2 {{'svaddhnb_u32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svaddhnb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svaddhnb,_u32,,)(svundef_u32(), svundef_u32()); - // expected-error@+2 {{'svaddhnb_n_u32' needs target feature sve2|sme}} - // overload-error@+1 {{'svaddhnb' needs target feature sve2|sme}} + // expected-error@+2 {{'svaddhnb_n_u32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svaddhnb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svaddhnb,_n_u32,,)(svundef_u32(), u32); - // expected-error@+2 {{'svabalt_u32' needs target feature sve2|sme}} - // overload-error@+1 {{'svabalt' needs target feature sve2|sme}} + // expected-error@+2 {{'svabalt_u32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svabalt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svabalt,_u32,,)(svundef_u32(), svundef_u16(), svundef_u16()); - // expected-error@+2 {{'svabalt_n_u32' needs target feature sve2|sme}} - // overload-error@+1 {{'svabalt' needs target feature sve2|sme}} + // expected-error@+2 {{'svabalt_n_u32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svabalt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svabalt,_n_u32,,)(svundef_u32(), svundef_u16(), u16); - // expected-error@+2 {{'sveor3_u32' needs target feature sve2|sme}} - // overload-error@+1 {{'sveor3' needs target feature sve2|sme}} + // expected-error@+2 {{'sveor3_u32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'sveor3' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(sveor3,_u32,,)(svundef_u32(), svundef_u32(), svundef_u32()); - // expected-error@+2 {{'sveor3_n_u32' needs target feature sve2|sme}} - // overload-error@+1 {{'sveor3' needs target feature sve2|sme}} + // expected-error@+2 {{'sveor3_n_u32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'sveor3' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(sveor3,_n_u32,,)(svundef_u32(), svundef_u32(), u32); - // expected-error@+2 {{'svhadd_u32_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svhadd_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svhadd_u32_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhadd_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhadd,_u32,_m,)(pg, svundef_u32(), svundef_u32()); - // expected-error@+2 {{'svhadd_n_u32_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svhadd_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svhadd_n_u32_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhadd_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhadd,_n_u32,_m,)(pg, svundef_u32(), u32); - // expected-error@+2 {{'svhadd_u32_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svhadd_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svhadd_u32_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhadd_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhadd,_u32,_z,)(pg, svundef_u32(), svundef_u32()); - // expected-error@+2 {{'svhadd_n_u32_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svhadd_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svhadd_n_u32_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhadd_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhadd,_n_u32,_z,)(pg, svundef_u32(), u32); - // expected-error@+2 {{'svhadd_u32_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svhadd_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svhadd_u32_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhadd_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhadd,_u32,_x,)(pg, svundef_u32(), svundef_u32()); - // expected-error@+2 {{'svhadd_n_u32_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svhadd_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svhadd_n_u32_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhadd_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhadd,_n_u32,_x,)(pg, svundef_u32(), u32); - // expected-error@+2 {{'svmovlb_u32' needs target feature sve2|sme}} - // overload-error@+1 {{'svmovlb' needs target feature sve2|sme}} + // expected-error@+2 {{'svmovlb_u32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svmovlb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svmovlb,_u32,,)(svundef_u16()); - // expected-error@+2 {{'svstnt1_scatter_u32base_u32' needs target feature sve2}} - // overload-error@+1 {{'svstnt1_scatter' needs target feature sve2}} + // expected-error@+2 {{'svstnt1_scatter_u32base_u32' needs target feature sve,sve2}} + // overload-error@+1 {{'svstnt1_scatter' needs target feature sve,sve2}} SVE_ACLE_FUNC(svstnt1_scatter, _u32base, , _u32)(pg, svundef_u32(), svundef_u32()); - // expected-error@+2 {{'svstnt1_scatter_u32offset_u32' needs target feature sve2}} - // overload-error@+1 {{'svstnt1_scatter_offset' needs target feature sve2}} + // expected-error@+2 {{'svstnt1_scatter_u32offset_u32' needs target feature sve,sve2}} + // overload-error@+1 {{'svstnt1_scatter_offset' needs target feature sve,sve2}} SVE_ACLE_FUNC(svstnt1_scatter_, u32, offset, _u32)(pg, u32_ptr, svundef_u32(), svundef_u32()); - // expected-error@+2 {{'svstnt1_scatter_u32base_offset_u32' needs target feature sve2}} - // overload-error@+1 {{'svstnt1_scatter_offset' needs target feature sve2}} + // expected-error@+2 {{'svstnt1_scatter_u32base_offset_u32' needs target feature sve,sve2}} + // overload-error@+1 {{'svstnt1_scatter_offset' needs target feature sve,sve2}} SVE_ACLE_FUNC(svstnt1_scatter, _u32base, _offset, _u32)(pg, svundef_u32(), i64, svundef_u32()); - // expected-error@+2 {{'svstnt1_scatter_u32base_index_u32' needs target feature sve2}} - // overload-error@+1 {{'svstnt1_scatter_index' needs target feature sve2}} + // expected-error@+2 {{'svstnt1_scatter_u32base_index_u32' needs target feature sve,sve2}} + // overload-error@+1 {{'svstnt1_scatter_index' needs target feature sve,sve2}} SVE_ACLE_FUNC(svstnt1_scatter, _u32base, _index, _u32)(pg, svundef_u32(), i64, svundef_u32()); - // expected-error@+2 {{'svmaxp_u32_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svmaxp_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svmaxp_u32_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svmaxp_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svmaxp,_u32,_m,)(pg, svundef_u32(), svundef_u32()); - // expected-error@+2 {{'svmaxp_u32_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svmaxp_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svmaxp_u32_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svmaxp_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svmaxp,_u32,_x,)(pg, svundef_u32(), svundef_u32()); - // expected-error@+2 {{'svsbclt_u32' needs target feature sve2|sme}} - // overload-error@+1 {{'svsbclt' needs target feature sve2|sme}} + // expected-error@+2 {{'svsbclt_u32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svsbclt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svsbclt,_u32,,)(svundef_u32(), svundef_u32(), svundef_u32()); - // expected-error@+2 {{'svsbclt_n_u32' needs target feature sve2|sme}} - // overload-error@+1 {{'svsbclt' needs target feature sve2|sme}} + // expected-error@+2 {{'svsbclt_n_u32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svsbclt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svsbclt,_n_u32,,)(svundef_u32(), svundef_u32(), u32); - // expected-error@+2 {{'svmullt_u32' needs target feature sve2|sme}} - // overload-error@+1 {{'svmullt' needs target feature sve2|sme}} + // expected-error@+2 {{'svmullt_u32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svmullt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svmullt,_u32,,)(svundef_u16(), svundef_u16()); - // expected-error@+2 {{'svmullt_n_u32' needs target feature sve2|sme}} - // overload-error@+1 {{'svmullt' needs target feature sve2|sme}} + // expected-error@+2 {{'svmullt_n_u32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svmullt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svmullt,_n_u32,,)(svundef_u16(), u16); - // expected-error@+2 {{'svldnt1sh_gather_u32base_u32' needs target feature sve2}} - // overload-error@+1 {{'svldnt1sh_gather_u32' needs target feature sve2}} + // expected-error@+2 {{'svldnt1sh_gather_u32base_u32' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1sh_gather_u32' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1sh_gather, _u32base, _u32, )(pg, svundef_u32()); - // expected-error@+2 {{'svldnt1sh_gather_u32offset_u32' needs target feature sve2}} - // overload-error@+1 {{'svldnt1sh_gather_offset_u32' needs target feature sve2}} + // expected-error@+2 {{'svldnt1sh_gather_u32offset_u32' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1sh_gather_offset_u32' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1sh_gather_, u32, offset_u32, )(pg, const_i16_ptr, svundef_u32()); - // expected-error@+2 {{'svldnt1sh_gather_u32base_offset_u32' needs target feature sve2}} - // overload-error@+1 {{'svldnt1sh_gather_offset_u32' needs target feature sve2}} + // expected-error@+2 {{'svldnt1sh_gather_u32base_offset_u32' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1sh_gather_offset_u32' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1sh_gather, _u32base, _offset_u32, )(pg, svundef_u32(), i64); - // expected-error@+2 {{'svldnt1sh_gather_u32base_index_u32' needs target feature sve2}} - // overload-error@+1 {{'svldnt1sh_gather_index_u32' needs target feature sve2}} + // expected-error@+2 {{'svldnt1sh_gather_u32base_index_u32' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1sh_gather_index_u32' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1sh_gather, _u32base, _index_u32, )(pg, svundef_u32(), i64); - // expected-error@+2 {{'svwhilerw_u32' needs target feature sve2|sme}} - // overload-error@+1 {{'svwhilerw' needs target feature sve2|sme}} + // expected-error@+2 {{'svwhilerw_u32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svwhilerw' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svwhilerw,_u32,,)(const_u32_ptr, const_u32_ptr); - // expected-error@+2 {{'svrhadd_u32_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svrhadd_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svrhadd_u32_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrhadd_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrhadd,_u32,_m,)(pg, svundef_u32(), svundef_u32()); - // expected-error@+2 {{'svrhadd_n_u32_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svrhadd_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svrhadd_n_u32_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrhadd_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrhadd,_n_u32,_m,)(pg, svundef_u32(), u32); - // expected-error@+2 {{'svrhadd_u32_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svrhadd_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svrhadd_u32_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrhadd_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrhadd,_u32,_z,)(pg, svundef_u32(), svundef_u32()); - // expected-error@+2 {{'svrhadd_n_u32_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svrhadd_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svrhadd_n_u32_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrhadd_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrhadd,_n_u32,_z,)(pg, svundef_u32(), u32); - // expected-error@+2 {{'svrhadd_u32_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svrhadd_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svrhadd_u32_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrhadd_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrhadd,_u32,_x,)(pg, svundef_u32(), svundef_u32()); - // expected-error@+2 {{'svrhadd_n_u32_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svrhadd_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svrhadd_n_u32_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrhadd_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrhadd,_n_u32,_x,)(pg, svundef_u32(), u32); - // expected-error@+2 {{'svraddhnb_u32' needs target feature sve2|sme}} - // overload-error@+1 {{'svraddhnb' needs target feature sve2|sme}} + // expected-error@+2 {{'svraddhnb_u32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svraddhnb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svraddhnb,_u32,,)(svundef_u32(), svundef_u32()); - // expected-error@+2 {{'svraddhnb_n_u32' needs target feature sve2|sme}} - // overload-error@+1 {{'svraddhnb' needs target feature sve2|sme}} + // expected-error@+2 {{'svraddhnb_n_u32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svraddhnb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svraddhnb,_n_u32,,)(svundef_u32(), u32); - // expected-error@+2 {{'svwhilewr_u32' needs target feature sve2|sme}} - // overload-error@+1 {{'svwhilewr' needs target feature sve2|sme}} + // expected-error@+2 {{'svwhilewr_u32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svwhilewr' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svwhilewr,_u32,,)(const_u32_ptr, const_u32_ptr); - // expected-error@+2 {{'svmlalb_u32' needs target feature sve2|sme}} - // overload-error@+1 {{'svmlalb' needs target feature sve2|sme}} + // expected-error@+2 {{'svmlalb_u32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svmlalb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svmlalb,_u32,,)(svundef_u32(), svundef_u16(), svundef_u16()); - // expected-error@+2 {{'svmlalb_n_u32' needs target feature sve2|sme}} - // overload-error@+1 {{'svmlalb' needs target feature sve2|sme}} + // expected-error@+2 {{'svmlalb_n_u32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svmlalb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svmlalb,_n_u32,,)(svundef_u32(), svundef_u16(), u16); - // expected-error@+2 {{'svldnt1sb_gather_u32base_u32' needs target feature sve2}} - // overload-error@+1 {{'svldnt1sb_gather_u32' needs target feature sve2}} + // expected-error@+2 {{'svldnt1sb_gather_u32base_u32' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1sb_gather_u32' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1sb_gather, _u32base, _u32, )(pg, svundef_u32()); - // expected-error@+2 {{'svldnt1sb_gather_u32offset_u32' needs target feature sve2}} - // overload-error@+1 {{'svldnt1sb_gather_offset_u32' needs target feature sve2}} + // expected-error@+2 {{'svldnt1sb_gather_u32offset_u32' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1sb_gather_offset_u32' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1sb_gather_, u32, offset_u32, )(pg, const_i8_ptr, svundef_u32()); - // expected-error@+2 {{'svldnt1sb_gather_u32base_offset_u32' needs target feature sve2}} - // overload-error@+1 {{'svldnt1sb_gather_offset_u32' needs target feature sve2}} + // expected-error@+2 {{'svldnt1sb_gather_u32base_offset_u32' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1sb_gather_offset_u32' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1sb_gather, _u32base, _offset_u32, )(pg, svundef_u32(), i64); - // expected-error@+2 {{'svsubwb_u32' needs target feature sve2|sme}} - // overload-error@+1 {{'svsubwb' needs target feature sve2|sme}} + // expected-error@+2 {{'svsubwb_u32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svsubwb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svsubwb,_u32,,)(svundef_u32(), svundef_u16()); - // expected-error@+2 {{'svsubwb_n_u32' needs target feature sve2|sme}} - // overload-error@+1 {{'svsubwb' needs target feature sve2|sme}} + // expected-error@+2 {{'svsubwb_n_u32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svsubwb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svsubwb,_n_u32,,)(svundef_u32(), u16); - // expected-error@+2 {{'svldnt1ub_gather_u32base_u32' needs target feature sve2}} - // overload-error@+1 {{'svldnt1ub_gather_u32' needs target feature sve2}} + // expected-error@+2 {{'svldnt1ub_gather_u32base_u32' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1ub_gather_u32' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1ub_gather, _u32base, _u32, )(pg, svundef_u32()); - // expected-error@+2 {{'svldnt1ub_gather_u32offset_u32' needs target feature sve2}} - // overload-error@+1 {{'svldnt1ub_gather_offset_u32' needs target feature sve2}} + // expected-error@+2 {{'svldnt1ub_gather_u32offset_u32' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1ub_gather_offset_u32' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1ub_gather_, u32, offset_u32, )(pg, const_u8_ptr, svundef_u32()); - // expected-error@+2 {{'svldnt1ub_gather_u32base_offset_u32' needs target feature sve2}} - // overload-error@+1 {{'svldnt1ub_gather_offset_u32' needs target feature sve2}} + // expected-error@+2 {{'svldnt1ub_gather_u32base_offset_u32' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1ub_gather_offset_u32' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1ub_gather, _u32base, _offset_u32, )(pg, svundef_u32(), i64); - // expected-error@+2 {{'svaba_u32' needs target feature sve2|sme}} - // overload-error@+1 {{'svaba' needs target feature sve2|sme}} + // expected-error@+2 {{'svaba_u32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svaba' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svaba,_u32,,)(svundef_u32(), svundef_u32(), svundef_u32()); - // expected-error@+2 {{'svaba_n_u32' needs target feature sve2|sme}} - // overload-error@+1 {{'svaba' needs target feature sve2|sme}} + // expected-error@+2 {{'svaba_n_u32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svaba' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svaba,_n_u32,,)(svundef_u32(), svundef_u32(), u32); - // expected-error@+2 {{'svraddhnt_u32' needs target feature sve2|sme}} - // overload-error@+1 {{'svraddhnt' needs target feature sve2|sme}} + // expected-error@+2 {{'svraddhnt_u32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svraddhnt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svraddhnt,_u32,,)(svundef_u16(), svundef_u32(), svundef_u32()); - // expected-error@+2 {{'svraddhnt_n_u32' needs target feature sve2|sme}} - // overload-error@+1 {{'svraddhnt' needs target feature sve2|sme}} + // expected-error@+2 {{'svraddhnt_n_u32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svraddhnt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svraddhnt,_n_u32,,)(svundef_u16(), svundef_u32(), u32); - // expected-error@+2 {{'sveorbt_u32' needs target feature sve2|sme}} - // overload-error@+1 {{'sveorbt' needs target feature sve2|sme}} + // expected-error@+2 {{'sveorbt_u32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'sveorbt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(sveorbt,_u32,,)(svundef_u32(), svundef_u32(), svundef_u32()); - // expected-error@+2 {{'sveorbt_n_u32' needs target feature sve2|sme}} - // overload-error@+1 {{'sveorbt' needs target feature sve2|sme}} + // expected-error@+2 {{'sveorbt_n_u32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'sveorbt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(sveorbt,_n_u32,,)(svundef_u32(), svundef_u32(), u32); - // expected-error@+2 {{'svbsl_u32' needs target feature sve2|sme}} - // overload-error@+1 {{'svbsl' needs target feature sve2|sme}} + // expected-error@+2 {{'svbsl_u32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svbsl' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svbsl,_u32,,)(svundef_u32(), svundef_u32(), svundef_u32()); - // expected-error@+2 {{'svbsl_n_u32' needs target feature sve2|sme}} - // overload-error@+1 {{'svbsl' needs target feature sve2|sme}} + // expected-error@+2 {{'svbsl_n_u32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svbsl' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svbsl,_n_u32,,)(svundef_u32(), svundef_u32(), u32); - // expected-error@+2 {{'svadclb_u32' needs target feature sve2|sme}} - // overload-error@+1 {{'svadclb' needs target feature sve2|sme}} + // expected-error@+2 {{'svadclb_u32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svadclb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svadclb,_u32,,)(svundef_u32(), svundef_u32(), svundef_u32()); - // expected-error@+2 {{'svadclb_n_u32' needs target feature sve2|sme}} - // overload-error@+1 {{'svadclb' needs target feature sve2|sme}} + // expected-error@+2 {{'svadclb_n_u32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svadclb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svadclb,_n_u32,,)(svundef_u32(), svundef_u32(), u32); - // expected-error@+2 {{'svhsub_u32_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svhsub_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svhsub_u32_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhsub_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhsub,_u32,_z,)(pg, svundef_u32(), svundef_u32()); - // expected-error@+2 {{'svhsub_u32_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svhsub_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svhsub_u32_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhsub_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhsub,_u32,_m,)(pg, svundef_u32(), svundef_u32()); - // expected-error@+2 {{'svhsub_u32_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svhsub_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svhsub_u32_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhsub_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhsub,_u32,_x,)(pg, svundef_u32(), svundef_u32()); - // expected-error@+2 {{'svhsub_n_u32_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svhsub_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svhsub_n_u32_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhsub_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhsub,_n_u32,_z,)(pg, svundef_u32(), u32); - // expected-error@+2 {{'svhsub_n_u32_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svhsub_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svhsub_n_u32_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhsub_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhsub,_n_u32,_m,)(pg, svundef_u32(), u32); - // expected-error@+2 {{'svhsub_n_u32_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svhsub_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svhsub_n_u32_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhsub_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhsub,_n_u32,_x,)(pg, svundef_u32(), u32); - // expected-error@+2 {{'svldnt1_gather_u32base_u32' needs target feature sve2}} - // overload-error@+1 {{'svldnt1_gather_u32' needs target feature sve2}} + // expected-error@+2 {{'svldnt1_gather_u32base_u32' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1_gather_u32' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1_gather, _u32base, _u32, )(pg, svundef_u32()); - // expected-error@+2 {{'svldnt1_gather_u32offset_u32' needs target feature sve2}} - // overload-error@+1 {{'svldnt1_gather_offset' needs target feature sve2}} + // expected-error@+2 {{'svldnt1_gather_u32offset_u32' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1_gather_offset' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1_gather_, u32, offset, _u32)(pg, const_u32_ptr, svundef_u32()); - // expected-error@+2 {{'svldnt1_gather_u32base_offset_u32' needs target feature sve2}} - // overload-error@+1 {{'svldnt1_gather_offset_u32' needs target feature sve2}} + // expected-error@+2 {{'svldnt1_gather_u32base_offset_u32' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1_gather_offset_u32' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1_gather, _u32base, _offset_u32, )(pg, svundef_u32(), i64); - // expected-error@+2 {{'svldnt1_gather_u32base_index_u32' needs target feature sve2}} - // overload-error@+1 {{'svldnt1_gather_index_u32' needs target feature sve2}} + // expected-error@+2 {{'svldnt1_gather_u32base_index_u32' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1_gather_index_u32' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1_gather, _u32base, _index_u32, )(pg, svundef_u32(), i64); - // expected-error@+2 {{'svaddlb_u32' needs target feature sve2|sme}} - // overload-error@+1 {{'svaddlb' needs target feature sve2|sme}} + // expected-error@+2 {{'svaddlb_u32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svaddlb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svaddlb,_u32,,)(svundef_u16(), svundef_u16()); - // expected-error@+2 {{'svaddlb_n_u32' needs target feature sve2|sme}} - // overload-error@+1 {{'svaddlb' needs target feature sve2|sme}} + // expected-error@+2 {{'svaddlb_n_u32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svaddlb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svaddlb,_n_u32,,)(svundef_u16(), u16); - // expected-error@+2 {{'svstnt1h_scatter_u32base_u32' needs target feature sve2}} - // overload-error@+1 {{'svstnt1h_scatter' needs target feature sve2}} + // expected-error@+2 {{'svstnt1h_scatter_u32base_u32' needs target feature sve,sve2}} + // overload-error@+1 {{'svstnt1h_scatter' needs target feature sve,sve2}} SVE_ACLE_FUNC(svstnt1h_scatter, _u32base, , _u32)(pg, svundef_u32(), svundef_u32()); - // expected-error@+2 {{'svstnt1h_scatter_u32offset_u32' needs target feature sve2}} - // overload-error@+1 {{'svstnt1h_scatter_offset' needs target feature sve2}} + // expected-error@+2 {{'svstnt1h_scatter_u32offset_u32' needs target feature sve,sve2}} + // overload-error@+1 {{'svstnt1h_scatter_offset' needs target feature sve,sve2}} SVE_ACLE_FUNC(svstnt1h_scatter_, u32, offset, _u32)(pg, u16_ptr, svundef_u32(), svundef_u32()); - // expected-error@+2 {{'svstnt1h_scatter_u32base_offset_u32' needs target feature sve2}} - // overload-error@+1 {{'svstnt1h_scatter_offset' needs target feature sve2}} + // expected-error@+2 {{'svstnt1h_scatter_u32base_offset_u32' needs target feature sve,sve2}} + // overload-error@+1 {{'svstnt1h_scatter_offset' needs target feature sve,sve2}} SVE_ACLE_FUNC(svstnt1h_scatter, _u32base, _offset, _u32)(pg, svundef_u32(), i64, svundef_u32()); - // expected-error@+2 {{'svstnt1h_scatter_u32base_index_u32' needs target feature sve2}} - // overload-error@+1 {{'svstnt1h_scatter_index' needs target feature sve2}} + // expected-error@+2 {{'svstnt1h_scatter_u32base_index_u32' needs target feature sve,sve2}} + // overload-error@+1 {{'svstnt1h_scatter_index' needs target feature sve,sve2}} SVE_ACLE_FUNC(svstnt1h_scatter, _u32base, _index, _u32)(pg, svundef_u32(), i64, svundef_u32()); - // expected-error@+2 {{'svstnt1b_scatter_u32base_u32' needs target feature sve2}} - // overload-error@+1 {{'svstnt1b_scatter' needs target feature sve2}} + // expected-error@+2 {{'svstnt1b_scatter_u32base_u32' needs target feature sve,sve2}} + // overload-error@+1 {{'svstnt1b_scatter' needs target feature sve,sve2}} SVE_ACLE_FUNC(svstnt1b_scatter, _u32base, , _u32)(pg, svundef_u32(), svundef_u32()); - // expected-error@+2 {{'svstnt1b_scatter_u32offset_u32' needs target feature sve2}} - // overload-error@+1 {{'svstnt1b_scatter_offset' needs target feature sve2}} + // expected-error@+2 {{'svstnt1b_scatter_u32offset_u32' needs target feature sve,sve2}} + // overload-error@+1 {{'svstnt1b_scatter_offset' needs target feature sve,sve2}} SVE_ACLE_FUNC(svstnt1b_scatter_, u32, offset, _u32)(pg, u8_ptr, svundef_u32(), svundef_u32()); - // expected-error@+2 {{'svstnt1b_scatter_u32base_offset_u32' needs target feature sve2}} - // overload-error@+1 {{'svstnt1b_scatter_offset' needs target feature sve2}} + // expected-error@+2 {{'svstnt1b_scatter_u32base_offset_u32' needs target feature sve,sve2}} + // overload-error@+1 {{'svstnt1b_scatter_offset' needs target feature sve,sve2}} SVE_ACLE_FUNC(svstnt1b_scatter, _u32base, _offset, _u32)(pg, svundef_u32(), i64, svundef_u32()); - // expected-error@+2 {{'svbsl2n_u32' needs target feature sve2|sme}} - // overload-error@+1 {{'svbsl2n' needs target feature sve2|sme}} + // expected-error@+2 {{'svbsl2n_u32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svbsl2n' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svbsl2n,_u32,,)(svundef_u32(), svundef_u32(), svundef_u32()); - // expected-error@+2 {{'svbsl2n_n_u32' needs target feature sve2|sme}} - // overload-error@+1 {{'svbsl2n' needs target feature sve2|sme}} + // expected-error@+2 {{'svbsl2n_n_u32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svbsl2n' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svbsl2n,_n_u32,,)(svundef_u32(), svundef_u32(), u32); - // expected-error@+2 {{'svaddlt_u32' needs target feature sve2|sme}} - // overload-error@+1 {{'svaddlt' needs target feature sve2|sme}} + // expected-error@+2 {{'svaddlt_u32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svaddlt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svaddlt,_u32,,)(svundef_u16(), svundef_u16()); - // expected-error@+2 {{'svaddlt_n_u32' needs target feature sve2|sme}} - // overload-error@+1 {{'svaddlt' needs target feature sve2|sme}} + // expected-error@+2 {{'svaddlt_n_u32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svaddlt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svaddlt,_n_u32,,)(svundef_u16(), u16); - // expected-error@+2 {{'svabalb_u32' needs target feature sve2|sme}} - // overload-error@+1 {{'svabalb' needs target feature sve2|sme}} + // expected-error@+2 {{'svabalb_u32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svabalb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svabalb,_u32,,)(svundef_u32(), svundef_u16(), svundef_u16()); - // expected-error@+2 {{'svabalb_n_u32' needs target feature sve2|sme}} - // overload-error@+1 {{'svabalb' needs target feature sve2|sme}} + // expected-error@+2 {{'svabalb_n_u32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svabalb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svabalb,_n_u32,,)(svundef_u32(), svundef_u16(), u16); - // expected-error@+2 {{'svsublb_u32' needs target feature sve2|sme}} - // overload-error@+1 {{'svsublb' needs target feature sve2|sme}} + // expected-error@+2 {{'svsublb_u32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svsublb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svsublb,_u32,,)(svundef_u16(), svundef_u16()); - // expected-error@+2 {{'svsublb_n_u32' needs target feature sve2|sme}} - // overload-error@+1 {{'svsublb' needs target feature sve2|sme}} + // expected-error@+2 {{'svsublb_n_u32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svsublb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svsublb,_n_u32,,)(svundef_u16(), u16); - // expected-error@+2 {{'svsbclb_u32' needs target feature sve2|sme}} - // overload-error@+1 {{'svsbclb' needs target feature sve2|sme}} + // expected-error@+2 {{'svsbclb_u32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svsbclb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svsbclb,_u32,,)(svundef_u32(), svundef_u32(), svundef_u32()); - // expected-error@+2 {{'svsbclb_n_u32' needs target feature sve2|sme}} - // overload-error@+1 {{'svsbclb' needs target feature sve2|sme}} + // expected-error@+2 {{'svsbclb_n_u32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svsbclb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svsbclb,_n_u32,,)(svundef_u32(), svundef_u32(), u32); - // expected-error@+2 {{'svbsl1n_u32' needs target feature sve2|sme}} - // overload-error@+1 {{'svbsl1n' needs target feature sve2|sme}} + // expected-error@+2 {{'svbsl1n_u32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svbsl1n' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svbsl1n,_u32,,)(svundef_u32(), svundef_u32(), svundef_u32()); - // expected-error@+2 {{'svbsl1n_n_u32' needs target feature sve2|sme}} - // overload-error@+1 {{'svbsl1n' needs target feature sve2|sme}} + // expected-error@+2 {{'svbsl1n_n_u32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svbsl1n' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svbsl1n,_n_u32,,)(svundef_u32(), svundef_u32(), u32); - // expected-error@+2 {{'svrshl_u32_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svrshl_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svrshl_u32_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrshl_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrshl,_u32,_z,)(pg, svundef_u32(), svundef_s32()); - // expected-error@+2 {{'svrshl_u32_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svrshl_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svrshl_u32_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrshl_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrshl,_u32,_m,)(pg, svundef_u32(), svundef_s32()); - // expected-error@+2 {{'svrshl_u32_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svrshl_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svrshl_u32_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrshl_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrshl,_u32,_x,)(pg, svundef_u32(), svundef_s32()); - // expected-error@+2 {{'svrshl_n_u32_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svrshl_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svrshl_n_u32_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrshl_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrshl,_n_u32,_z,)(pg, svundef_u32(), i32); - // expected-error@+2 {{'svrshl_n_u32_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svrshl_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svrshl_n_u32_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrshl_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrshl,_n_u32,_m,)(pg, svundef_u32(), i32); - // expected-error@+2 {{'svrshl_n_u32_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svrshl_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svrshl_n_u32_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrshl_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrshl,_n_u32,_x,)(pg, svundef_u32(), i32); - // expected-error@+2 {{'svrsqrte_u32_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svrsqrte_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svrsqrte_u32_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrsqrte_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrsqrte,_u32,_z,)(pg, svundef_u32()); - // expected-error@+2 {{'svrsqrte_u32_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svrsqrte_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svrsqrte_u32_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrsqrte_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrsqrte,_u32,_m,)(svundef_u32(), pg, svundef_u32()); - // expected-error@+2 {{'svrsqrte_u32_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svrsqrte_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svrsqrte_u32_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrsqrte_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrsqrte,_u32,_x,)(pg, svundef_u32()); - // expected-error@+2 {{'svaddwt_u32' needs target feature sve2|sme}} - // overload-error@+1 {{'svaddwt' needs target feature sve2|sme}} + // expected-error@+2 {{'svaddwt_u32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svaddwt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svaddwt,_u32,,)(svundef_u32(), svundef_u16()); - // expected-error@+2 {{'svaddwt_n_u32' needs target feature sve2|sme}} - // overload-error@+1 {{'svaddwt' needs target feature sve2|sme}} + // expected-error@+2 {{'svaddwt_n_u32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svaddwt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svaddwt,_n_u32,,)(svundef_u32(), u16); - // expected-error@+2 {{'svmlslb_u32' needs target feature sve2|sme}} - // overload-error@+1 {{'svmlslb' needs target feature sve2|sme}} + // expected-error@+2 {{'svmlslb_u32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svmlslb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svmlslb,_u32,,)(svundef_u32(), svundef_u16(), svundef_u16()); - // expected-error@+2 {{'svmlslb_n_u32' needs target feature sve2|sme}} - // overload-error@+1 {{'svmlslb' needs target feature sve2|sme}} + // expected-error@+2 {{'svmlslb_n_u32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svmlslb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svmlslb,_n_u32,,)(svundef_u32(), svundef_u16(), u16); - // expected-error@+2 {{'svmlslt_u32' needs target feature sve2|sme}} - // overload-error@+1 {{'svmlslt' needs target feature sve2|sme}} + // expected-error@+2 {{'svmlslt_u32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svmlslt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svmlslt,_u32,,)(svundef_u32(), svundef_u16(), svundef_u16()); - // expected-error@+2 {{'svmlslt_n_u32' needs target feature sve2|sme}} - // overload-error@+1 {{'svmlslt' needs target feature sve2|sme}} + // expected-error@+2 {{'svmlslt_n_u32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svmlslt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svmlslt,_n_u32,,)(svundef_u32(), svundef_u16(), u16); - // expected-error@+2 {{'svmovlt_u32' needs target feature sve2|sme}} - // overload-error@+1 {{'svmovlt' needs target feature sve2|sme}} + // expected-error@+2 {{'svmovlt_u32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svmovlt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svmovlt,_u32,,)(svundef_u16()); - // expected-error@+2 {{'svqshl_u32_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svqshl_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svqshl_u32_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqshl_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqshl,_u32,_z,)(pg, svundef_u32(), svundef_s32()); - // expected-error@+2 {{'svqshl_u32_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svqshl_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svqshl_u32_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqshl_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqshl,_u32,_m,)(pg, svundef_u32(), svundef_s32()); - // expected-error@+2 {{'svqshl_u32_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svqshl_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svqshl_u32_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqshl_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqshl,_u32,_x,)(pg, svundef_u32(), svundef_s32()); - // expected-error@+2 {{'svqshl_n_u32_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svqshl_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svqshl_n_u32_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqshl_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqshl,_n_u32,_z,)(pg, svundef_u32(), i32); - // expected-error@+2 {{'svqshl_n_u32_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svqshl_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svqshl_n_u32_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqshl_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqshl,_n_u32,_m,)(pg, svundef_u32(), i32); - // expected-error@+2 {{'svqshl_n_u32_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svqshl_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svqshl_n_u32_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqshl_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqshl,_n_u32,_x,)(pg, svundef_u32(), i32); - // expected-error@+2 {{'svmullb_u64' needs target feature sve2|sme}} - // overload-error@+1 {{'svmullb' needs target feature sve2|sme}} + // expected-error@+2 {{'svmullb_u64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svmullb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svmullb,_u64,,)(svundef_u32(), svundef_u32()); - // expected-error@+2 {{'svmullb_n_u64' needs target feature sve2|sme}} - // overload-error@+1 {{'svmullb' needs target feature sve2|sme}} + // expected-error@+2 {{'svmullb_n_u64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svmullb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svmullb,_n_u64,,)(svundef_u32(), u32); - // expected-error@+2 {{'svpmullb_u64' needs target feature sve2|sme}} - // overload-error@+1 {{'svpmullb' needs target feature sve2|sme}} + // expected-error@+2 {{'svpmullb_u64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svpmullb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svpmullb,_u64,,)(svundef_u32(), svundef_u32()); - // expected-error@+2 {{'svpmullb_n_u64' needs target feature sve2|sme}} - // overload-error@+1 {{'svpmullb' needs target feature sve2|sme}} + // expected-error@+2 {{'svpmullb_n_u64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svpmullb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svpmullb,_n_u64,,)(svundef_u32(), u32); - // expected-error@+2 {{'svaddwb_u64' needs target feature sve2|sme}} - // overload-error@+1 {{'svaddwb' needs target feature sve2|sme}} + // expected-error@+2 {{'svaddwb_u64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svaddwb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svaddwb,_u64,,)(svundef_u64(), svundef_u32()); - // expected-error@+2 {{'svaddwb_n_u64' needs target feature sve2|sme}} - // overload-error@+1 {{'svaddwb' needs target feature sve2|sme}} + // expected-error@+2 {{'svaddwb_n_u64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svaddwb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svaddwb,_n_u64,,)(svundef_u64(), u32); - // expected-error@+2 {{'svsubhnb_u64' needs target feature sve2|sme}} - // overload-error@+1 {{'svsubhnb' needs target feature sve2|sme}} + // expected-error@+2 {{'svsubhnb_u64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svsubhnb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svsubhnb,_u64,,)(svundef_u64(), svundef_u64()); - // expected-error@+2 {{'svsubhnb_n_u64' needs target feature sve2|sme}} - // overload-error@+1 {{'svsubhnb' needs target feature sve2|sme}} + // expected-error@+2 {{'svsubhnb_n_u64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svsubhnb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svsubhnb,_n_u64,,)(svundef_u64(), u64); - // expected-error@+2 {{'svrsubhnt_u64' needs target feature sve2|sme}} - // overload-error@+1 {{'svrsubhnt' needs target feature sve2|sme}} + // expected-error@+2 {{'svrsubhnt_u64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrsubhnt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrsubhnt,_u64,,)(svundef_u32(), svundef_u64(), svundef_u64()); - // expected-error@+2 {{'svrsubhnt_n_u64' needs target feature sve2|sme}} - // overload-error@+1 {{'svrsubhnt' needs target feature sve2|sme}} + // expected-error@+2 {{'svrsubhnt_n_u64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrsubhnt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrsubhnt,_n_u64,,)(svundef_u32(), svundef_u64(), u64); - // expected-error@+2 {{'svnbsl_u64' needs target feature sve2|sme}} - // overload-error@+1 {{'svnbsl' needs target feature sve2|sme}} + // expected-error@+2 {{'svnbsl_u64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svnbsl' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svnbsl,_u64,,)(svundef_u64(), svundef_u64(), svundef_u64()); - // expected-error@+2 {{'svnbsl_n_u64' needs target feature sve2|sme}} - // overload-error@+1 {{'svnbsl' needs target feature sve2|sme}} + // expected-error@+2 {{'svnbsl_n_u64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svnbsl' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svnbsl,_n_u64,,)(svundef_u64(), svundef_u64(), u64); - // expected-error@+2 {{'svsubhnt_u64' needs target feature sve2|sme}} - // overload-error@+1 {{'svsubhnt' needs target feature sve2|sme}} + // expected-error@+2 {{'svsubhnt_u64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svsubhnt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svsubhnt,_u64,,)(svundef_u32(), svundef_u64(), svundef_u64()); - // expected-error@+2 {{'svsubhnt_n_u64' needs target feature sve2|sme}} - // overload-error@+1 {{'svsubhnt' needs target feature sve2|sme}} + // expected-error@+2 {{'svsubhnt_n_u64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svsubhnt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svsubhnt,_n_u64,,)(svundef_u32(), svundef_u64(), u64); - // expected-error@+2 {{'svwhilegt_b8_u64' needs target feature sve2|sme}} - // overload-error@+1 {{'svwhilegt_b8' needs target feature sve2|sme}} + // expected-error@+2 {{'svwhilegt_b8_u64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svwhilegt_b8' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svwhilegt_b8,_u64,,)(u64, u64); - // expected-error@+2 {{'svwhilegt_b16_u64' needs target feature sve2|sme}} - // overload-error@+1 {{'svwhilegt_b16' needs target feature sve2|sme}} + // expected-error@+2 {{'svwhilegt_b16_u64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svwhilegt_b16' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svwhilegt_b16,_u64,,)(u64, u64); - // expected-error@+2 {{'svwhilegt_b32_u64' needs target feature sve2|sme}} - // overload-error@+1 {{'svwhilegt_b32' needs target feature sve2|sme}} + // expected-error@+2 {{'svwhilegt_b32_u64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svwhilegt_b32' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svwhilegt_b32,_u64,,)(u64, u64); - // expected-error@+2 {{'svwhilegt_b64_u64' needs target feature sve2|sme}} - // overload-error@+1 {{'svwhilegt_b64' needs target feature sve2|sme}} + // expected-error@+2 {{'svwhilegt_b64_u64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svwhilegt_b64' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svwhilegt_b64,_u64,,)(u64, u64); - // expected-error@+2 {{'svtbl2_u64' needs target feature sve2|sme}} - // overload-error@+1 {{'svtbl2' needs target feature sve2|sme}} + // expected-error@+2 {{'svtbl2_u64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svtbl2' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svtbl2,_u64,,)(svundef2_u64(), svundef_u64()); - // expected-error@+2 {{'svhsubr_u64_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svhsubr_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svhsubr_u64_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhsubr_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhsubr,_u64,_z,)(pg, svundef_u64(), svundef_u64()); - // expected-error@+2 {{'svhsubr_u64_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svhsubr_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svhsubr_u64_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhsubr_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhsubr,_u64,_m,)(pg, svundef_u64(), svundef_u64()); - // expected-error@+2 {{'svhsubr_u64_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svhsubr_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svhsubr_u64_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhsubr_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhsubr,_u64,_x,)(pg, svundef_u64(), svundef_u64()); - // expected-error@+2 {{'svhsubr_n_u64_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svhsubr_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svhsubr_n_u64_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhsubr_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhsubr,_n_u64,_z,)(pg, svundef_u64(), u64); - // expected-error@+2 {{'svhsubr_n_u64_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svhsubr_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svhsubr_n_u64_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhsubr_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhsubr,_n_u64,_m,)(pg, svundef_u64(), u64); - // expected-error@+2 {{'svhsubr_n_u64_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svhsubr_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svhsubr_n_u64_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhsubr_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhsubr,_n_u64,_x,)(pg, svundef_u64(), u64); - // expected-error@+2 {{'svhistcnt_u64_z' needs target feature sve2}} - // overload-error@+1 {{'svhistcnt_z' needs target feature sve2}} + // expected-error@+2 {{'svhistcnt_u64_z' needs target feature sve,sve2}} + // overload-error@+1 {{'svhistcnt_z' needs target feature sve,sve2}} SVE_ACLE_FUNC(svhistcnt,_u64,_z,)(pg, svundef_u64(), svundef_u64()); - // expected-error@+2 {{'sveortb_u64' needs target feature sve2|sme}} - // overload-error@+1 {{'sveortb' needs target feature sve2|sme}} + // expected-error@+2 {{'sveortb_u64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'sveortb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(sveortb,_u64,,)(svundef_u64(), svundef_u64(), svundef_u64()); - // expected-error@+2 {{'sveortb_n_u64' needs target feature sve2|sme}} - // overload-error@+1 {{'sveortb' needs target feature sve2|sme}} + // expected-error@+2 {{'sveortb_n_u64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'sveortb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(sveortb,_n_u64,,)(svundef_u64(), svundef_u64(), u64); - // expected-error@+2 {{'svqxtnb_u64' needs target feature sve2|sme}} - // overload-error@+1 {{'svqxtnb' needs target feature sve2|sme}} + // expected-error@+2 {{'svqxtnb_u64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqxtnb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqxtnb,_u64,,)(svundef_u64()); - // expected-error@+2 {{'svmlalt_u64' needs target feature sve2|sme}} - // overload-error@+1 {{'svmlalt' needs target feature sve2|sme}} + // expected-error@+2 {{'svmlalt_u64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svmlalt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svmlalt,_u64,,)(svundef_u64(), svundef_u32(), svundef_u32()); - // expected-error@+2 {{'svmlalt_n_u64' needs target feature sve2|sme}} - // overload-error@+1 {{'svmlalt' needs target feature sve2|sme}} + // expected-error@+2 {{'svmlalt_n_u64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svmlalt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svmlalt,_n_u64,,)(svundef_u64(), svundef_u32(), u32); - // expected-error@+2 {{'svaddhnt_u64' needs target feature sve2|sme}} - // overload-error@+1 {{'svaddhnt' needs target feature sve2|sme}} + // expected-error@+2 {{'svaddhnt_u64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svaddhnt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svaddhnt,_u64,,)(svundef_u32(), svundef_u64(), svundef_u64()); - // expected-error@+2 {{'svaddhnt_n_u64' needs target feature sve2|sme}} - // overload-error@+1 {{'svaddhnt' needs target feature sve2|sme}} + // expected-error@+2 {{'svaddhnt_n_u64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svaddhnt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svaddhnt,_n_u64,,)(svundef_u32(), svundef_u64(), u64); - // expected-error@+2 {{'svldnt1uh_gather_u64base_u64' needs target feature sve2}} - // overload-error@+1 {{'svldnt1uh_gather_u64' needs target feature sve2}} + // expected-error@+2 {{'svldnt1uh_gather_u64base_u64' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1uh_gather_u64' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1uh_gather, _u64base, _u64, )(pg, svundef_u64()); - // expected-error@+2 {{'svldnt1uh_gather_s64offset_u64' needs target feature sve2}} - // overload-error@+1 {{'svldnt1uh_gather_offset_u64' needs target feature sve2}} + // expected-error@+2 {{'svldnt1uh_gather_s64offset_u64' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1uh_gather_offset_u64' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1uh_gather_, s64, offset_u64, )(pg, const_u16_ptr, svundef_s64()); - // expected-error@+2 {{'svldnt1uh_gather_u64offset_u64' needs target feature sve2}} - // overload-error@+1 {{'svldnt1uh_gather_offset_u64' needs target feature sve2}} + // expected-error@+2 {{'svldnt1uh_gather_u64offset_u64' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1uh_gather_offset_u64' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1uh_gather_, u64, offset_u64, )(pg, const_u16_ptr, svundef_u64()); - // expected-error@+2 {{'svldnt1uh_gather_u64base_offset_u64' needs target feature sve2}} - // overload-error@+1 {{'svldnt1uh_gather_offset_u64' needs target feature sve2}} + // expected-error@+2 {{'svldnt1uh_gather_u64base_offset_u64' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1uh_gather_offset_u64' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1uh_gather, _u64base, _offset_u64, )(pg, svundef_u64(), i64); - // expected-error@+2 {{'svldnt1uh_gather_s64index_u64' needs target feature sve2}} - // overload-error@+1 {{'svldnt1uh_gather_index_u64' needs target feature sve2}} + // expected-error@+2 {{'svldnt1uh_gather_s64index_u64' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1uh_gather_index_u64' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1uh_gather_, s64, index_u64, )(pg, const_u16_ptr, svundef_s64()); - // expected-error@+2 {{'svldnt1uh_gather_u64index_u64' needs target feature sve2}} - // overload-error@+1 {{'svldnt1uh_gather_index_u64' needs target feature sve2}} + // expected-error@+2 {{'svldnt1uh_gather_u64index_u64' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1uh_gather_index_u64' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1uh_gather_, u64, index_u64, )(pg, const_u16_ptr, svundef_u64()); - // expected-error@+2 {{'svldnt1uh_gather_u64base_index_u64' needs target feature sve2}} - // overload-error@+1 {{'svldnt1uh_gather_index_u64' needs target feature sve2}} + // expected-error@+2 {{'svldnt1uh_gather_u64base_index_u64' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1uh_gather_index_u64' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1uh_gather, _u64base, _index_u64, )(pg, svundef_u64(), i64); - // expected-error@+2 {{'svbcax_u64' needs target feature sve2|sme}} - // overload-error@+1 {{'svbcax' needs target feature sve2|sme}} + // expected-error@+2 {{'svbcax_u64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svbcax' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svbcax,_u64,,)(svundef_u64(), svundef_u64(), svundef_u64()); - // expected-error@+2 {{'svbcax_n_u64' needs target feature sve2|sme}} - // overload-error@+1 {{'svbcax' needs target feature sve2|sme}} + // expected-error@+2 {{'svbcax_n_u64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svbcax' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svbcax,_n_u64,,)(svundef_u64(), svundef_u64(), u64); - // expected-error@+2 {{'svqxtnt_u64' needs target feature sve2|sme}} - // overload-error@+1 {{'svqxtnt' needs target feature sve2|sme}} + // expected-error@+2 {{'svqxtnt_u64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqxtnt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqxtnt,_u64,,)(svundef_u32(), svundef_u64()); - // expected-error@+2 {{'svqrshl_u64_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svqrshl_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svqrshl_u64_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqrshl_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqrshl,_u64,_z,)(pg, svundef_u64(), svundef_s64()); - // expected-error@+2 {{'svqrshl_u64_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svqrshl_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svqrshl_u64_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqrshl_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqrshl,_u64,_m,)(pg, svundef_u64(), svundef_s64()); - // expected-error@+2 {{'svqrshl_u64_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svqrshl_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svqrshl_u64_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqrshl_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqrshl,_u64,_x,)(pg, svundef_u64(), svundef_s64()); - // expected-error@+2 {{'svqrshl_n_u64_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svqrshl_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svqrshl_n_u64_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqrshl_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqrshl,_n_u64,_z,)(pg, svundef_u64(), i64); - // expected-error@+2 {{'svqrshl_n_u64_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svqrshl_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svqrshl_n_u64_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqrshl_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqrshl,_n_u64,_m,)(pg, svundef_u64(), i64); - // expected-error@+2 {{'svqrshl_n_u64_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svqrshl_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svqrshl_n_u64_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqrshl_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqrshl,_n_u64,_x,)(pg, svundef_u64(), i64); - // expected-error@+2 {{'svsublt_u64' needs target feature sve2|sme}} - // overload-error@+1 {{'svsublt' needs target feature sve2|sme}} + // expected-error@+2 {{'svsublt_u64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svsublt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svsublt,_u64,,)(svundef_u32(), svundef_u32()); - // expected-error@+2 {{'svsublt_n_u64' needs target feature sve2|sme}} - // overload-error@+1 {{'svsublt' needs target feature sve2|sme}} + // expected-error@+2 {{'svsublt_n_u64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svsublt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svsublt,_n_u64,,)(svundef_u32(), u32); - // expected-error@+2 {{'svadalp_u64_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svadalp_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svadalp_u64_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svadalp_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svadalp,_u64,_z,)(pg, svundef_u64(), svundef_u32()); - // expected-error@+2 {{'svadalp_u64_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svadalp_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svadalp_u64_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svadalp_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svadalp,_u64,_m,)(pg, svundef_u64(), svundef_u32()); - // expected-error@+2 {{'svadalp_u64_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svadalp_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svadalp_u64_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svadalp_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svadalp,_u64,_x,)(pg, svundef_u64(), svundef_u32()); - // expected-error@+2 {{'svwhilege_b8_u64' needs target feature sve2|sme}} - // overload-error@+1 {{'svwhilege_b8' needs target feature sve2|sme}} + // expected-error@+2 {{'svwhilege_b8_u64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svwhilege_b8' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svwhilege_b8,_u64,,)(u64, u64); - // expected-error@+2 {{'svwhilege_b16_u64' needs target feature sve2|sme}} - // overload-error@+1 {{'svwhilege_b16' needs target feature sve2|sme}} + // expected-error@+2 {{'svwhilege_b16_u64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svwhilege_b16' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svwhilege_b16,_u64,,)(u64, u64); - // expected-error@+2 {{'svwhilege_b32_u64' needs target feature sve2|sme}} - // overload-error@+1 {{'svwhilege_b32' needs target feature sve2|sme}} + // expected-error@+2 {{'svwhilege_b32_u64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svwhilege_b32' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svwhilege_b32,_u64,,)(u64, u64); - // expected-error@+2 {{'svwhilege_b64_u64' needs target feature sve2|sme}} - // overload-error@+1 {{'svwhilege_b64' needs target feature sve2|sme}} + // expected-error@+2 {{'svwhilege_b64_u64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svwhilege_b64' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svwhilege_b64,_u64,,)(u64, u64); - // expected-error@+2 {{'svpmullt_u64' needs target feature sve2|sme}} - // overload-error@+1 {{'svpmullt' needs target feature sve2|sme}} + // expected-error@+2 {{'svpmullt_u64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svpmullt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svpmullt,_u64,,)(svundef_u32(), svundef_u32()); - // expected-error@+2 {{'svpmullt_n_u64' needs target feature sve2|sme}} - // overload-error@+1 {{'svpmullt' needs target feature sve2|sme}} + // expected-error@+2 {{'svpmullt_n_u64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svpmullt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svpmullt,_n_u64,,)(svundef_u32(), u32); - // expected-error@+2 {{'svsubwt_u64' needs target feature sve2|sme}} - // overload-error@+1 {{'svsubwt' needs target feature sve2|sme}} + // expected-error@+2 {{'svsubwt_u64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svsubwt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svsubwt,_u64,,)(svundef_u64(), svundef_u32()); - // expected-error@+2 {{'svsubwt_n_u64' needs target feature sve2|sme}} - // overload-error@+1 {{'svsubwt' needs target feature sve2|sme}} + // expected-error@+2 {{'svsubwt_n_u64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svsubwt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svsubwt,_n_u64,,)(svundef_u64(), u32); - // expected-error@+2 {{'svqsubr_u64_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svqsubr_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svqsubr_u64_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqsubr_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqsubr,_u64,_z,)(pg, svundef_u64(), svundef_u64()); - // expected-error@+2 {{'svqsubr_u64_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svqsubr_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svqsubr_u64_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqsubr_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqsubr,_u64,_m,)(pg, svundef_u64(), svundef_u64()); - // expected-error@+2 {{'svqsubr_u64_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svqsubr_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svqsubr_u64_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqsubr_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqsubr,_u64,_x,)(pg, svundef_u64(), svundef_u64()); - // expected-error@+2 {{'svqsubr_n_u64_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svqsubr_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svqsubr_n_u64_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqsubr_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqsubr,_n_u64,_z,)(pg, svundef_u64(), u64); - // expected-error@+2 {{'svqsubr_n_u64_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svqsubr_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svqsubr_n_u64_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqsubr_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqsubr,_n_u64,_m,)(pg, svundef_u64(), u64); - // expected-error@+2 {{'svqsubr_n_u64_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svqsubr_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svqsubr_n_u64_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqsubr_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqsubr,_n_u64,_x,)(pg, svundef_u64(), u64); - // expected-error@+2 {{'svadclt_u64' needs target feature sve2|sme}} - // overload-error@+1 {{'svadclt' needs target feature sve2|sme}} + // expected-error@+2 {{'svadclt_u64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svadclt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svadclt,_u64,,)(svundef_u64(), svundef_u64(), svundef_u64()); - // expected-error@+2 {{'svadclt_n_u64' needs target feature sve2|sme}} - // overload-error@+1 {{'svadclt' needs target feature sve2|sme}} + // expected-error@+2 {{'svadclt_n_u64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svadclt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svadclt,_n_u64,,)(svundef_u64(), svundef_u64(), u64); - // expected-error@+2 {{'svaddp_u64_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svaddp_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svaddp_u64_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svaddp_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svaddp,_u64,_m,)(pg, svundef_u64(), svundef_u64()); - // expected-error@+2 {{'svaddp_u64_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svaddp_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svaddp_u64_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svaddp_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svaddp,_u64,_x,)(pg, svundef_u64(), svundef_u64()); - // expected-error@+2 {{'svqadd_u64_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svqadd_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svqadd_u64_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqadd_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqadd,_u64,_m,)(pg, svundef_u64(), svundef_u64()); - // expected-error@+2 {{'svqadd_n_u64_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svqadd_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svqadd_n_u64_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqadd_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqadd,_n_u64,_m,)(pg, svundef_u64(), u64); - // expected-error@+2 {{'svqadd_u64_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svqadd_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svqadd_u64_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqadd_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqadd,_u64,_z,)(pg, svundef_u64(), svundef_u64()); - // expected-error@+2 {{'svqadd_n_u64_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svqadd_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svqadd_n_u64_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqadd_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqadd,_n_u64,_z,)(pg, svundef_u64(), u64); - // expected-error@+2 {{'svqadd_u64_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svqadd_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svqadd_u64_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqadd_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqadd,_u64,_x,)(pg, svundef_u64(), svundef_u64()); - // expected-error@+2 {{'svqadd_n_u64_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svqadd_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svqadd_n_u64_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqadd_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqadd,_n_u64,_x,)(pg, svundef_u64(), u64); - // expected-error@+2 {{'svabdlb_u64' needs target feature sve2|sme}} - // overload-error@+1 {{'svabdlb' needs target feature sve2|sme}} + // expected-error@+2 {{'svabdlb_u64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svabdlb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svabdlb,_u64,,)(svundef_u32(), svundef_u32()); - // expected-error@+2 {{'svabdlb_n_u64' needs target feature sve2|sme}} - // overload-error@+1 {{'svabdlb' needs target feature sve2|sme}} + // expected-error@+2 {{'svabdlb_n_u64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svabdlb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svabdlb,_n_u64,,)(svundef_u32(), u32); - // expected-error@+2 {{'svtbx_u64' needs target feature sve2|sme}} - // overload-error@+1 {{'svtbx' needs target feature sve2|sme}} + // expected-error@+2 {{'svtbx_u64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svtbx' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svtbx,_u64,,)(svundef_u64(), svundef_u64(), svundef_u64()); - // expected-error@+2 {{'svabdlt_u64' needs target feature sve2|sme}} - // overload-error@+1 {{'svabdlt' needs target feature sve2|sme}} + // expected-error@+2 {{'svabdlt_u64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svabdlt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svabdlt,_u64,,)(svundef_u32(), svundef_u32()); - // expected-error@+2 {{'svabdlt_n_u64' needs target feature sve2|sme}} - // overload-error@+1 {{'svabdlt' needs target feature sve2|sme}} + // expected-error@+2 {{'svabdlt_n_u64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svabdlt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svabdlt,_n_u64,,)(svundef_u32(), u32); - // expected-error@+2 {{'svminp_u64_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svminp_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svminp_u64_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svminp_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svminp,_u64,_m,)(pg, svundef_u64(), svundef_u64()); - // expected-error@+2 {{'svminp_u64_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svminp_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svminp_u64_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svminp_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svminp,_u64,_x,)(pg, svundef_u64(), svundef_u64()); - // expected-error@+2 {{'svsqadd_u64_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svsqadd_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svsqadd_u64_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svsqadd_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svsqadd,_u64,_m,)(pg, svundef_u64(), svundef_s64()); - // expected-error@+2 {{'svsqadd_n_u64_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svsqadd_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svsqadd_n_u64_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svsqadd_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svsqadd,_n_u64,_m,)(pg, svundef_u64(), i64); - // expected-error@+2 {{'svsqadd_u64_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svsqadd_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svsqadd_u64_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svsqadd_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svsqadd,_u64,_z,)(pg, svundef_u64(), svundef_s64()); - // expected-error@+2 {{'svsqadd_n_u64_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svsqadd_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svsqadd_n_u64_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svsqadd_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svsqadd,_n_u64,_z,)(pg, svundef_u64(), i64); - // expected-error@+2 {{'svsqadd_u64_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svsqadd_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svsqadd_u64_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svsqadd_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svsqadd,_u64,_x,)(pg, svundef_u64(), svundef_s64()); - // expected-error@+2 {{'svsqadd_n_u64_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svsqadd_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svsqadd_n_u64_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svsqadd_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svsqadd,_n_u64,_x,)(pg, svundef_u64(), i64); - // expected-error@+2 {{'svqsub_u64_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svqsub_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svqsub_u64_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqsub_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqsub,_u64,_z,)(pg, svundef_u64(), svundef_u64()); - // expected-error@+2 {{'svqsub_u64_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svqsub_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svqsub_u64_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqsub_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqsub,_u64,_m,)(pg, svundef_u64(), svundef_u64()); - // expected-error@+2 {{'svqsub_u64_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svqsub_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svqsub_u64_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqsub_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqsub,_u64,_x,)(pg, svundef_u64(), svundef_u64()); - // expected-error@+2 {{'svqsub_n_u64_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svqsub_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svqsub_n_u64_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqsub_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqsub,_n_u64,_z,)(pg, svundef_u64(), u64); - // expected-error@+2 {{'svqsub_n_u64_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svqsub_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svqsub_n_u64_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqsub_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqsub,_n_u64,_m,)(pg, svundef_u64(), u64); - // expected-error@+2 {{'svqsub_n_u64_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svqsub_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svqsub_n_u64_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqsub_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqsub,_n_u64,_x,)(pg, svundef_u64(), u64); - // expected-error@+2 {{'svrsubhnb_u64' needs target feature sve2|sme}} - // overload-error@+1 {{'svrsubhnb' needs target feature sve2|sme}} + // expected-error@+2 {{'svrsubhnb_u64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrsubhnb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrsubhnb,_u64,,)(svundef_u64(), svundef_u64()); - // expected-error@+2 {{'svrsubhnb_n_u64' needs target feature sve2|sme}} - // overload-error@+1 {{'svrsubhnb' needs target feature sve2|sme}} + // expected-error@+2 {{'svrsubhnb_n_u64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrsubhnb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrsubhnb,_n_u64,,)(svundef_u64(), u64); - // expected-error@+2 {{'svaddhnb_u64' needs target feature sve2|sme}} - // overload-error@+1 {{'svaddhnb' needs target feature sve2|sme}} + // expected-error@+2 {{'svaddhnb_u64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svaddhnb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svaddhnb,_u64,,)(svundef_u64(), svundef_u64()); - // expected-error@+2 {{'svaddhnb_n_u64' needs target feature sve2|sme}} - // overload-error@+1 {{'svaddhnb' needs target feature sve2|sme}} + // expected-error@+2 {{'svaddhnb_n_u64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svaddhnb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svaddhnb,_n_u64,,)(svundef_u64(), u64); - // expected-error@+2 {{'svabalt_u64' needs target feature sve2|sme}} - // overload-error@+1 {{'svabalt' needs target feature sve2|sme}} + // expected-error@+2 {{'svabalt_u64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svabalt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svabalt,_u64,,)(svundef_u64(), svundef_u32(), svundef_u32()); - // expected-error@+2 {{'svabalt_n_u64' needs target feature sve2|sme}} - // overload-error@+1 {{'svabalt' needs target feature sve2|sme}} + // expected-error@+2 {{'svabalt_n_u64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svabalt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svabalt,_n_u64,,)(svundef_u64(), svundef_u32(), u32); - // expected-error@+2 {{'sveor3_u64' needs target feature sve2|sme}} - // overload-error@+1 {{'sveor3' needs target feature sve2|sme}} + // expected-error@+2 {{'sveor3_u64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'sveor3' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(sveor3,_u64,,)(svundef_u64(), svundef_u64(), svundef_u64()); - // expected-error@+2 {{'sveor3_n_u64' needs target feature sve2|sme}} - // overload-error@+1 {{'sveor3' needs target feature sve2|sme}} + // expected-error@+2 {{'sveor3_n_u64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'sveor3' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(sveor3,_n_u64,,)(svundef_u64(), svundef_u64(), u64); - // expected-error@+2 {{'svhadd_u64_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svhadd_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svhadd_u64_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhadd_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhadd,_u64,_m,)(pg, svundef_u64(), svundef_u64()); - // expected-error@+2 {{'svhadd_n_u64_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svhadd_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svhadd_n_u64_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhadd_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhadd,_n_u64,_m,)(pg, svundef_u64(), u64); - // expected-error@+2 {{'svhadd_u64_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svhadd_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svhadd_u64_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhadd_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhadd,_u64,_z,)(pg, svundef_u64(), svundef_u64()); - // expected-error@+2 {{'svhadd_n_u64_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svhadd_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svhadd_n_u64_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhadd_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhadd,_n_u64,_z,)(pg, svundef_u64(), u64); - // expected-error@+2 {{'svhadd_u64_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svhadd_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svhadd_u64_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhadd_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhadd,_u64,_x,)(pg, svundef_u64(), svundef_u64()); - // expected-error@+2 {{'svhadd_n_u64_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svhadd_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svhadd_n_u64_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhadd_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhadd,_n_u64,_x,)(pg, svundef_u64(), u64); - // expected-error@+2 {{'svmovlb_u64' needs target feature sve2|sme}} - // overload-error@+1 {{'svmovlb' needs target feature sve2|sme}} + // expected-error@+2 {{'svmovlb_u64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svmovlb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svmovlb,_u64,,)(svundef_u32()); - // expected-error@+2 {{'svstnt1_scatter_u64base_u64' needs target feature sve2}} - // overload-error@+1 {{'svstnt1_scatter' needs target feature sve2}} + // expected-error@+2 {{'svstnt1_scatter_u64base_u64' needs target feature sve,sve2}} + // overload-error@+1 {{'svstnt1_scatter' needs target feature sve,sve2}} SVE_ACLE_FUNC(svstnt1_scatter, _u64base, , _u64)(pg, svundef_u64(), svundef_u64()); - // expected-error@+2 {{'svstnt1_scatter_s64offset_u64' needs target feature sve2}} - // overload-error@+1 {{'svstnt1_scatter_offset' needs target feature sve2}} + // expected-error@+2 {{'svstnt1_scatter_s64offset_u64' needs target feature sve,sve2}} + // overload-error@+1 {{'svstnt1_scatter_offset' needs target feature sve,sve2}} SVE_ACLE_FUNC(svstnt1_scatter_, s64, offset, _u64)(pg, u64_ptr, svundef_s64(), svundef_u64()); - // expected-error@+2 {{'svstnt1_scatter_u64offset_u64' needs target feature sve2}} - // overload-error@+1 {{'svstnt1_scatter_offset' needs target feature sve2}} + // expected-error@+2 {{'svstnt1_scatter_u64offset_u64' needs target feature sve,sve2}} + // overload-error@+1 {{'svstnt1_scatter_offset' needs target feature sve,sve2}} SVE_ACLE_FUNC(svstnt1_scatter_, u64, offset, _u64)(pg, u64_ptr, svundef_u64(), svundef_u64()); - // expected-error@+2 {{'svstnt1_scatter_u64base_offset_u64' needs target feature sve2}} - // overload-error@+1 {{'svstnt1_scatter_offset' needs target feature sve2}} + // expected-error@+2 {{'svstnt1_scatter_u64base_offset_u64' needs target feature sve,sve2}} + // overload-error@+1 {{'svstnt1_scatter_offset' needs target feature sve,sve2}} SVE_ACLE_FUNC(svstnt1_scatter, _u64base, _offset, _u64)(pg, svundef_u64(), i64, svundef_u64()); - // expected-error@+2 {{'svstnt1_scatter_s64index_u64' needs target feature sve2}} - // overload-error@+1 {{'svstnt1_scatter_index' needs target feature sve2}} + // expected-error@+2 {{'svstnt1_scatter_s64index_u64' needs target feature sve,sve2}} + // overload-error@+1 {{'svstnt1_scatter_index' needs target feature sve,sve2}} SVE_ACLE_FUNC(svstnt1_scatter_, s64, index, _u64)(pg, u64_ptr, svundef_s64(), svundef_u64()); - // expected-error@+2 {{'svstnt1_scatter_u64index_u64' needs target feature sve2}} - // overload-error@+1 {{'svstnt1_scatter_index' needs target feature sve2}} + // expected-error@+2 {{'svstnt1_scatter_u64index_u64' needs target feature sve,sve2}} + // overload-error@+1 {{'svstnt1_scatter_index' needs target feature sve,sve2}} SVE_ACLE_FUNC(svstnt1_scatter_, u64, index, _u64)(pg, u64_ptr, svundef_u64(), svundef_u64()); - // expected-error@+2 {{'svstnt1_scatter_u64base_index_u64' needs target feature sve2}} - // overload-error@+1 {{'svstnt1_scatter_index' needs target feature sve2}} + // expected-error@+2 {{'svstnt1_scatter_u64base_index_u64' needs target feature sve,sve2}} + // overload-error@+1 {{'svstnt1_scatter_index' needs target feature sve,sve2}} SVE_ACLE_FUNC(svstnt1_scatter, _u64base, _index, _u64)(pg, svundef_u64(), i64, svundef_u64()); - // expected-error@+2 {{'svmaxp_u64_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svmaxp_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svmaxp_u64_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svmaxp_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svmaxp,_u64,_m,)(pg, svundef_u64(), svundef_u64()); - // expected-error@+2 {{'svmaxp_u64_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svmaxp_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svmaxp_u64_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svmaxp_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svmaxp,_u64,_x,)(pg, svundef_u64(), svundef_u64()); - // expected-error@+2 {{'svsbclt_u64' needs target feature sve2|sme}} - // overload-error@+1 {{'svsbclt' needs target feature sve2|sme}} + // expected-error@+2 {{'svsbclt_u64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svsbclt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svsbclt,_u64,,)(svundef_u64(), svundef_u64(), svundef_u64()); - // expected-error@+2 {{'svsbclt_n_u64' needs target feature sve2|sme}} - // overload-error@+1 {{'svsbclt' needs target feature sve2|sme}} + // expected-error@+2 {{'svsbclt_n_u64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svsbclt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svsbclt,_n_u64,,)(svundef_u64(), svundef_u64(), u64); - // expected-error@+2 {{'svmullt_u64' needs target feature sve2|sme}} - // overload-error@+1 {{'svmullt' needs target feature sve2|sme}} + // expected-error@+2 {{'svmullt_u64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svmullt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svmullt,_u64,,)(svundef_u32(), svundef_u32()); - // expected-error@+2 {{'svmullt_n_u64' needs target feature sve2|sme}} - // overload-error@+1 {{'svmullt' needs target feature sve2|sme}} + // expected-error@+2 {{'svmullt_n_u64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svmullt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svmullt,_n_u64,,)(svundef_u32(), u32); - // expected-error@+2 {{'svldnt1sh_gather_u64base_u64' needs target feature sve2}} - // overload-error@+1 {{'svldnt1sh_gather_u64' needs target feature sve2}} + // expected-error@+2 {{'svldnt1sh_gather_u64base_u64' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1sh_gather_u64' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1sh_gather, _u64base, _u64, )(pg, svundef_u64()); - // expected-error@+2 {{'svldnt1sh_gather_s64offset_u64' needs target feature sve2}} - // overload-error@+1 {{'svldnt1sh_gather_offset_u64' needs target feature sve2}} + // expected-error@+2 {{'svldnt1sh_gather_s64offset_u64' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1sh_gather_offset_u64' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1sh_gather_, s64, offset_u64, )(pg, const_i16_ptr, svundef_s64()); - // expected-error@+2 {{'svldnt1sh_gather_u64offset_u64' needs target feature sve2}} - // overload-error@+1 {{'svldnt1sh_gather_offset_u64' needs target feature sve2}} + // expected-error@+2 {{'svldnt1sh_gather_u64offset_u64' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1sh_gather_offset_u64' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1sh_gather_, u64, offset_u64, )(pg, const_i16_ptr, svundef_u64()); - // expected-error@+2 {{'svldnt1sh_gather_u64base_offset_u64' needs target feature sve2}} - // overload-error@+1 {{'svldnt1sh_gather_offset_u64' needs target feature sve2}} + // expected-error@+2 {{'svldnt1sh_gather_u64base_offset_u64' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1sh_gather_offset_u64' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1sh_gather, _u64base, _offset_u64, )(pg, svundef_u64(), i64); - // expected-error@+2 {{'svldnt1sh_gather_s64index_u64' needs target feature sve2}} - // overload-error@+1 {{'svldnt1sh_gather_index_u64' needs target feature sve2}} + // expected-error@+2 {{'svldnt1sh_gather_s64index_u64' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1sh_gather_index_u64' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1sh_gather_, s64, index_u64, )(pg, const_i16_ptr, svundef_s64()); - // expected-error@+2 {{'svldnt1sh_gather_u64index_u64' needs target feature sve2}} - // overload-error@+1 {{'svldnt1sh_gather_index_u64' needs target feature sve2}} + // expected-error@+2 {{'svldnt1sh_gather_u64index_u64' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1sh_gather_index_u64' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1sh_gather_, u64, index_u64, )(pg, const_i16_ptr, svundef_u64()); - // expected-error@+2 {{'svldnt1sh_gather_u64base_index_u64' needs target feature sve2}} - // overload-error@+1 {{'svldnt1sh_gather_index_u64' needs target feature sve2}} + // expected-error@+2 {{'svldnt1sh_gather_u64base_index_u64' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1sh_gather_index_u64' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1sh_gather, _u64base, _index_u64, )(pg, svundef_u64(), i64); - // expected-error@+2 {{'svwhilerw_u64' needs target feature sve2|sme}} - // overload-error@+1 {{'svwhilerw' needs target feature sve2|sme}} + // expected-error@+2 {{'svwhilerw_u64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svwhilerw' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svwhilerw,_u64,,)(const_u64_ptr, const_u64_ptr); - // expected-error@+2 {{'svrhadd_u64_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svrhadd_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svrhadd_u64_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrhadd_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrhadd,_u64,_m,)(pg, svundef_u64(), svundef_u64()); - // expected-error@+2 {{'svrhadd_n_u64_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svrhadd_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svrhadd_n_u64_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrhadd_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrhadd,_n_u64,_m,)(pg, svundef_u64(), u64); - // expected-error@+2 {{'svrhadd_u64_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svrhadd_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svrhadd_u64_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrhadd_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrhadd,_u64,_z,)(pg, svundef_u64(), svundef_u64()); - // expected-error@+2 {{'svrhadd_n_u64_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svrhadd_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svrhadd_n_u64_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrhadd_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrhadd,_n_u64,_z,)(pg, svundef_u64(), u64); - // expected-error@+2 {{'svrhadd_u64_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svrhadd_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svrhadd_u64_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrhadd_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrhadd,_u64,_x,)(pg, svundef_u64(), svundef_u64()); - // expected-error@+2 {{'svrhadd_n_u64_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svrhadd_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svrhadd_n_u64_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrhadd_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrhadd,_n_u64,_x,)(pg, svundef_u64(), u64); - // expected-error@+2 {{'svraddhnb_u64' needs target feature sve2|sme}} - // overload-error@+1 {{'svraddhnb' needs target feature sve2|sme}} + // expected-error@+2 {{'svraddhnb_u64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svraddhnb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svraddhnb,_u64,,)(svundef_u64(), svundef_u64()); - // expected-error@+2 {{'svraddhnb_n_u64' needs target feature sve2|sme}} - // overload-error@+1 {{'svraddhnb' needs target feature sve2|sme}} + // expected-error@+2 {{'svraddhnb_n_u64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svraddhnb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svraddhnb,_n_u64,,)(svundef_u64(), u64); - // expected-error@+2 {{'svwhilewr_u64' needs target feature sve2|sme}} - // overload-error@+1 {{'svwhilewr' needs target feature sve2|sme}} + // expected-error@+2 {{'svwhilewr_u64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svwhilewr' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svwhilewr,_u64,,)(const_u64_ptr, const_u64_ptr); - // expected-error@+2 {{'svmlalb_u64' needs target feature sve2|sme}} - // overload-error@+1 {{'svmlalb' needs target feature sve2|sme}} + // expected-error@+2 {{'svmlalb_u64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svmlalb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svmlalb,_u64,,)(svundef_u64(), svundef_u32(), svundef_u32()); - // expected-error@+2 {{'svmlalb_n_u64' needs target feature sve2|sme}} - // overload-error@+1 {{'svmlalb' needs target feature sve2|sme}} + // expected-error@+2 {{'svmlalb_n_u64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svmlalb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svmlalb,_n_u64,,)(svundef_u64(), svundef_u32(), u32); - // expected-error@+2 {{'svldnt1sb_gather_u64base_u64' needs target feature sve2}} - // overload-error@+1 {{'svldnt1sb_gather_u64' needs target feature sve2}} + // expected-error@+2 {{'svldnt1sb_gather_u64base_u64' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1sb_gather_u64' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1sb_gather, _u64base, _u64, )(pg, svundef_u64()); - // expected-error@+2 {{'svldnt1sb_gather_s64offset_u64' needs target feature sve2}} - // overload-error@+1 {{'svldnt1sb_gather_offset_u64' needs target feature sve2}} + // expected-error@+2 {{'svldnt1sb_gather_s64offset_u64' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1sb_gather_offset_u64' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1sb_gather_, s64, offset_u64, )(pg, const_i8_ptr, svundef_s64()); - // expected-error@+2 {{'svldnt1sb_gather_u64offset_u64' needs target feature sve2}} - // overload-error@+1 {{'svldnt1sb_gather_offset_u64' needs target feature sve2}} + // expected-error@+2 {{'svldnt1sb_gather_u64offset_u64' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1sb_gather_offset_u64' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1sb_gather_, u64, offset_u64, )(pg, const_i8_ptr, svundef_u64()); - // expected-error@+2 {{'svldnt1sb_gather_u64base_offset_u64' needs target feature sve2}} - // overload-error@+1 {{'svldnt1sb_gather_offset_u64' needs target feature sve2}} + // expected-error@+2 {{'svldnt1sb_gather_u64base_offset_u64' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1sb_gather_offset_u64' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1sb_gather, _u64base, _offset_u64, )(pg, svundef_u64(), i64); - // expected-error@+2 {{'svsubwb_u64' needs target feature sve2|sme}} - // overload-error@+1 {{'svsubwb' needs target feature sve2|sme}} + // expected-error@+2 {{'svsubwb_u64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svsubwb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svsubwb,_u64,,)(svundef_u64(), svundef_u32()); - // expected-error@+2 {{'svsubwb_n_u64' needs target feature sve2|sme}} - // overload-error@+1 {{'svsubwb' needs target feature sve2|sme}} + // expected-error@+2 {{'svsubwb_n_u64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svsubwb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svsubwb,_n_u64,,)(svundef_u64(), u32); - // expected-error@+2 {{'svldnt1ub_gather_u64base_u64' needs target feature sve2}} - // overload-error@+1 {{'svldnt1ub_gather_u64' needs target feature sve2}} + // expected-error@+2 {{'svldnt1ub_gather_u64base_u64' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1ub_gather_u64' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1ub_gather, _u64base, _u64, )(pg, svundef_u64()); - // expected-error@+2 {{'svldnt1ub_gather_s64offset_u64' needs target feature sve2}} - // overload-error@+1 {{'svldnt1ub_gather_offset_u64' needs target feature sve2}} + // expected-error@+2 {{'svldnt1ub_gather_s64offset_u64' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1ub_gather_offset_u64' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1ub_gather_, s64, offset_u64, )(pg, const_u8_ptr, svundef_s64()); - // expected-error@+2 {{'svldnt1ub_gather_u64offset_u64' needs target feature sve2}} - // overload-error@+1 {{'svldnt1ub_gather_offset_u64' needs target feature sve2}} + // expected-error@+2 {{'svldnt1ub_gather_u64offset_u64' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1ub_gather_offset_u64' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1ub_gather_, u64, offset_u64, )(pg, const_u8_ptr, svundef_u64()); - // expected-error@+2 {{'svldnt1ub_gather_u64base_offset_u64' needs target feature sve2}} - // overload-error@+1 {{'svldnt1ub_gather_offset_u64' needs target feature sve2}} + // expected-error@+2 {{'svldnt1ub_gather_u64base_offset_u64' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1ub_gather_offset_u64' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1ub_gather, _u64base, _offset_u64, )(pg, svundef_u64(), i64); - // expected-error@+2 {{'svaba_u64' needs target feature sve2|sme}} - // overload-error@+1 {{'svaba' needs target feature sve2|sme}} + // expected-error@+2 {{'svaba_u64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svaba' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svaba,_u64,,)(svundef_u64(), svundef_u64(), svundef_u64()); - // expected-error@+2 {{'svaba_n_u64' needs target feature sve2|sme}} - // overload-error@+1 {{'svaba' needs target feature sve2|sme}} + // expected-error@+2 {{'svaba_n_u64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svaba' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svaba,_n_u64,,)(svundef_u64(), svundef_u64(), u64); - // expected-error@+2 {{'svraddhnt_u64' needs target feature sve2|sme}} - // overload-error@+1 {{'svraddhnt' needs target feature sve2|sme}} + // expected-error@+2 {{'svraddhnt_u64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svraddhnt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svraddhnt,_u64,,)(svundef_u32(), svundef_u64(), svundef_u64()); - // expected-error@+2 {{'svraddhnt_n_u64' needs target feature sve2|sme}} - // overload-error@+1 {{'svraddhnt' needs target feature sve2|sme}} + // expected-error@+2 {{'svraddhnt_n_u64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svraddhnt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svraddhnt,_n_u64,,)(svundef_u32(), svundef_u64(), u64); - // expected-error@+2 {{'sveorbt_u64' needs target feature sve2|sme}} - // overload-error@+1 {{'sveorbt' needs target feature sve2|sme}} + // expected-error@+2 {{'sveorbt_u64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'sveorbt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(sveorbt,_u64,,)(svundef_u64(), svundef_u64(), svundef_u64()); - // expected-error@+2 {{'sveorbt_n_u64' needs target feature sve2|sme}} - // overload-error@+1 {{'sveorbt' needs target feature sve2|sme}} + // expected-error@+2 {{'sveorbt_n_u64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'sveorbt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(sveorbt,_n_u64,,)(svundef_u64(), svundef_u64(), u64); - // expected-error@+2 {{'svldnt1sw_gather_u64base_u64' needs target feature sve2}} - // overload-error@+1 {{'svldnt1sw_gather_u64' needs target feature sve2}} + // expected-error@+2 {{'svldnt1sw_gather_u64base_u64' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1sw_gather_u64' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1sw_gather, _u64base, _u64, )(pg, svundef_u64()); - // expected-error@+2 {{'svldnt1sw_gather_s64offset_u64' needs target feature sve2}} - // overload-error@+1 {{'svldnt1sw_gather_offset_u64' needs target feature sve2}} + // expected-error@+2 {{'svldnt1sw_gather_s64offset_u64' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1sw_gather_offset_u64' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1sw_gather_, s64, offset_u64, )(pg, const_i32_ptr, svundef_s64()); - // expected-error@+2 {{'svldnt1sw_gather_u64offset_u64' needs target feature sve2}} - // overload-error@+1 {{'svldnt1sw_gather_offset_u64' needs target feature sve2}} + // expected-error@+2 {{'svldnt1sw_gather_u64offset_u64' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1sw_gather_offset_u64' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1sw_gather_, u64, offset_u64, )(pg, const_i32_ptr, svundef_u64()); - // expected-error@+2 {{'svldnt1sw_gather_u64base_offset_u64' needs target feature sve2}} - // overload-error@+1 {{'svldnt1sw_gather_offset_u64' needs target feature sve2}} + // expected-error@+2 {{'svldnt1sw_gather_u64base_offset_u64' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1sw_gather_offset_u64' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1sw_gather, _u64base, _offset_u64, )(pg, svundef_u64(), i64); - // expected-error@+2 {{'svldnt1sw_gather_s64index_u64' needs target feature sve2}} - // overload-error@+1 {{'svldnt1sw_gather_index_u64' needs target feature sve2}} + // expected-error@+2 {{'svldnt1sw_gather_s64index_u64' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1sw_gather_index_u64' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1sw_gather_, s64, index_u64, )(pg, const_i32_ptr, svundef_s64()); - // expected-error@+2 {{'svldnt1sw_gather_u64index_u64' needs target feature sve2}} - // overload-error@+1 {{'svldnt1sw_gather_index_u64' needs target feature sve2}} + // expected-error@+2 {{'svldnt1sw_gather_u64index_u64' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1sw_gather_index_u64' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1sw_gather_, u64, index_u64, )(pg, const_i32_ptr, svundef_u64()); - // expected-error@+2 {{'svldnt1sw_gather_u64base_index_u64' needs target feature sve2}} - // overload-error@+1 {{'svldnt1sw_gather_index_u64' needs target feature sve2}} + // expected-error@+2 {{'svldnt1sw_gather_u64base_index_u64' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1sw_gather_index_u64' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1sw_gather, _u64base, _index_u64, )(pg, svundef_u64(), i64); - // expected-error@+2 {{'svbsl_u64' needs target feature sve2|sme}} - // overload-error@+1 {{'svbsl' needs target feature sve2|sme}} + // expected-error@+2 {{'svbsl_u64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svbsl' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svbsl,_u64,,)(svundef_u64(), svundef_u64(), svundef_u64()); - // expected-error@+2 {{'svbsl_n_u64' needs target feature sve2|sme}} - // overload-error@+1 {{'svbsl' needs target feature sve2|sme}} + // expected-error@+2 {{'svbsl_n_u64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svbsl' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svbsl,_n_u64,,)(svundef_u64(), svundef_u64(), u64); - // expected-error@+2 {{'svadclb_u64' needs target feature sve2|sme}} - // overload-error@+1 {{'svadclb' needs target feature sve2|sme}} + // expected-error@+2 {{'svadclb_u64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svadclb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svadclb,_u64,,)(svundef_u64(), svundef_u64(), svundef_u64()); - // expected-error@+2 {{'svadclb_n_u64' needs target feature sve2|sme}} - // overload-error@+1 {{'svadclb' needs target feature sve2|sme}} + // expected-error@+2 {{'svadclb_n_u64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svadclb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svadclb,_n_u64,,)(svundef_u64(), svundef_u64(), u64); - // expected-error@+2 {{'svhsub_u64_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svhsub_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svhsub_u64_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhsub_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhsub,_u64,_z,)(pg, svundef_u64(), svundef_u64()); - // expected-error@+2 {{'svhsub_u64_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svhsub_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svhsub_u64_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhsub_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhsub,_u64,_m,)(pg, svundef_u64(), svundef_u64()); - // expected-error@+2 {{'svhsub_u64_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svhsub_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svhsub_u64_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhsub_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhsub,_u64,_x,)(pg, svundef_u64(), svundef_u64()); - // expected-error@+2 {{'svhsub_n_u64_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svhsub_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svhsub_n_u64_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhsub_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhsub,_n_u64,_z,)(pg, svundef_u64(), u64); - // expected-error@+2 {{'svhsub_n_u64_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svhsub_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svhsub_n_u64_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhsub_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhsub,_n_u64,_m,)(pg, svundef_u64(), u64); - // expected-error@+2 {{'svhsub_n_u64_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svhsub_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svhsub_n_u64_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svhsub_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svhsub,_n_u64,_x,)(pg, svundef_u64(), u64); - // expected-error@+2 {{'svldnt1_gather_u64base_u64' needs target feature sve2}} - // overload-error@+1 {{'svldnt1_gather_u64' needs target feature sve2}} + // expected-error@+2 {{'svldnt1_gather_u64base_u64' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1_gather_u64' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1_gather, _u64base, _u64, )(pg, svundef_u64()); - // expected-error@+2 {{'svldnt1_gather_s64offset_u64' needs target feature sve2}} - // overload-error@+1 {{'svldnt1_gather_offset' needs target feature sve2}} + // expected-error@+2 {{'svldnt1_gather_s64offset_u64' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1_gather_offset' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1_gather_, s64, offset, _u64)(pg, const_u64_ptr, svundef_s64()); - // expected-error@+2 {{'svldnt1_gather_u64offset_u64' needs target feature sve2}} - // overload-error@+1 {{'svldnt1_gather_offset' needs target feature sve2}} + // expected-error@+2 {{'svldnt1_gather_u64offset_u64' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1_gather_offset' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1_gather_, u64, offset, _u64)(pg, const_u64_ptr, svundef_u64()); - // expected-error@+2 {{'svldnt1_gather_u64base_offset_u64' needs target feature sve2}} - // overload-error@+1 {{'svldnt1_gather_offset_u64' needs target feature sve2}} + // expected-error@+2 {{'svldnt1_gather_u64base_offset_u64' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1_gather_offset_u64' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1_gather, _u64base, _offset_u64, )(pg, svundef_u64(), i64); - // expected-error@+2 {{'svldnt1_gather_s64index_u64' needs target feature sve2}} - // overload-error@+1 {{'svldnt1_gather_index' needs target feature sve2}} + // expected-error@+2 {{'svldnt1_gather_s64index_u64' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1_gather_index' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1_gather_, s64, index, _u64)(pg, const_u64_ptr, svundef_s64()); - // expected-error@+2 {{'svldnt1_gather_u64index_u64' needs target feature sve2}} - // overload-error@+1 {{'svldnt1_gather_index' needs target feature sve2}} + // expected-error@+2 {{'svldnt1_gather_u64index_u64' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1_gather_index' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1_gather_, u64, index, _u64)(pg, const_u64_ptr, svundef_u64()); - // expected-error@+2 {{'svldnt1_gather_u64base_index_u64' needs target feature sve2}} - // overload-error@+1 {{'svldnt1_gather_index_u64' needs target feature sve2}} + // expected-error@+2 {{'svldnt1_gather_u64base_index_u64' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1_gather_index_u64' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1_gather, _u64base, _index_u64, )(pg, svundef_u64(), i64); - // expected-error@+2 {{'svaddlb_u64' needs target feature sve2|sme}} - // overload-error@+1 {{'svaddlb' needs target feature sve2|sme}} + // expected-error@+2 {{'svaddlb_u64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svaddlb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svaddlb,_u64,,)(svundef_u32(), svundef_u32()); - // expected-error@+2 {{'svaddlb_n_u64' needs target feature sve2|sme}} - // overload-error@+1 {{'svaddlb' needs target feature sve2|sme}} + // expected-error@+2 {{'svaddlb_n_u64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svaddlb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svaddlb,_n_u64,,)(svundef_u32(), u32); - // expected-error@+2 {{'svldnt1uw_gather_u64base_u64' needs target feature sve2}} - // overload-error@+1 {{'svldnt1uw_gather_u64' needs target feature sve2}} + // expected-error@+2 {{'svldnt1uw_gather_u64base_u64' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1uw_gather_u64' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1uw_gather, _u64base, _u64, )(pg, svundef_u64()); - // expected-error@+2 {{'svldnt1uw_gather_s64offset_u64' needs target feature sve2}} - // overload-error@+1 {{'svldnt1uw_gather_offset_u64' needs target feature sve2}} + // expected-error@+2 {{'svldnt1uw_gather_s64offset_u64' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1uw_gather_offset_u64' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1uw_gather_, s64, offset_u64, )(pg, const_u32_ptr, svundef_s64()); - // expected-error@+2 {{'svldnt1uw_gather_u64offset_u64' needs target feature sve2}} - // overload-error@+1 {{'svldnt1uw_gather_offset_u64' needs target feature sve2}} + // expected-error@+2 {{'svldnt1uw_gather_u64offset_u64' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1uw_gather_offset_u64' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1uw_gather_, u64, offset_u64, )(pg, const_u32_ptr, svundef_u64()); - // expected-error@+2 {{'svldnt1uw_gather_u64base_offset_u64' needs target feature sve2}} - // overload-error@+1 {{'svldnt1uw_gather_offset_u64' needs target feature sve2}} + // expected-error@+2 {{'svldnt1uw_gather_u64base_offset_u64' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1uw_gather_offset_u64' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1uw_gather, _u64base, _offset_u64, )(pg, svundef_u64(), i64); - // expected-error@+2 {{'svldnt1uw_gather_s64index_u64' needs target feature sve2}} - // overload-error@+1 {{'svldnt1uw_gather_index_u64' needs target feature sve2}} + // expected-error@+2 {{'svldnt1uw_gather_s64index_u64' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1uw_gather_index_u64' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1uw_gather_, s64, index_u64, )(pg, const_u32_ptr, svundef_s64()); - // expected-error@+2 {{'svldnt1uw_gather_u64index_u64' needs target feature sve2}} - // overload-error@+1 {{'svldnt1uw_gather_index_u64' needs target feature sve2}} + // expected-error@+2 {{'svldnt1uw_gather_u64index_u64' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1uw_gather_index_u64' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1uw_gather_, u64, index_u64, )(pg, const_u32_ptr, svundef_u64()); - // expected-error@+2 {{'svldnt1uw_gather_u64base_index_u64' needs target feature sve2}} - // overload-error@+1 {{'svldnt1uw_gather_index_u64' needs target feature sve2}} + // expected-error@+2 {{'svldnt1uw_gather_u64base_index_u64' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1uw_gather_index_u64' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1uw_gather, _u64base, _index_u64, )(pg, svundef_u64(), i64); - // expected-error@+2 {{'svstnt1h_scatter_u64base_u64' needs target feature sve2}} - // overload-error@+1 {{'svstnt1h_scatter' needs target feature sve2}} + // expected-error@+2 {{'svstnt1h_scatter_u64base_u64' needs target feature sve,sve2}} + // overload-error@+1 {{'svstnt1h_scatter' needs target feature sve,sve2}} SVE_ACLE_FUNC(svstnt1h_scatter, _u64base, , _u64)(pg, svundef_u64(), svundef_u64()); - // expected-error@+2 {{'svstnt1h_scatter_s64offset_u64' needs target feature sve2}} - // overload-error@+1 {{'svstnt1h_scatter_offset' needs target feature sve2}} + // expected-error@+2 {{'svstnt1h_scatter_s64offset_u64' needs target feature sve,sve2}} + // overload-error@+1 {{'svstnt1h_scatter_offset' needs target feature sve,sve2}} SVE_ACLE_FUNC(svstnt1h_scatter_, s64, offset, _u64)(pg, u16_ptr, svundef_s64(), svundef_u64()); - // expected-error@+2 {{'svstnt1h_scatter_u64offset_u64' needs target feature sve2}} - // overload-error@+1 {{'svstnt1h_scatter_offset' needs target feature sve2}} + // expected-error@+2 {{'svstnt1h_scatter_u64offset_u64' needs target feature sve,sve2}} + // overload-error@+1 {{'svstnt1h_scatter_offset' needs target feature sve,sve2}} SVE_ACLE_FUNC(svstnt1h_scatter_, u64, offset, _u64)(pg, u16_ptr, svundef_u64(), svundef_u64()); - // expected-error@+2 {{'svstnt1h_scatter_u64base_offset_u64' needs target feature sve2}} - // overload-error@+1 {{'svstnt1h_scatter_offset' needs target feature sve2}} + // expected-error@+2 {{'svstnt1h_scatter_u64base_offset_u64' needs target feature sve,sve2}} + // overload-error@+1 {{'svstnt1h_scatter_offset' needs target feature sve,sve2}} SVE_ACLE_FUNC(svstnt1h_scatter, _u64base, _offset, _u64)(pg, svundef_u64(), i64, svundef_u64()); - // expected-error@+2 {{'svstnt1h_scatter_s64index_u64' needs target feature sve2}} - // overload-error@+1 {{'svstnt1h_scatter_index' needs target feature sve2}} + // expected-error@+2 {{'svstnt1h_scatter_s64index_u64' needs target feature sve,sve2}} + // overload-error@+1 {{'svstnt1h_scatter_index' needs target feature sve,sve2}} SVE_ACLE_FUNC(svstnt1h_scatter_, s64, index, _u64)(pg, u16_ptr, svundef_s64(), svundef_u64()); - // expected-error@+2 {{'svstnt1h_scatter_u64index_u64' needs target feature sve2}} - // overload-error@+1 {{'svstnt1h_scatter_index' needs target feature sve2}} + // expected-error@+2 {{'svstnt1h_scatter_u64index_u64' needs target feature sve,sve2}} + // overload-error@+1 {{'svstnt1h_scatter_index' needs target feature sve,sve2}} SVE_ACLE_FUNC(svstnt1h_scatter_, u64, index, _u64)(pg, u16_ptr, svundef_u64(), svundef_u64()); - // expected-error@+2 {{'svstnt1h_scatter_u64base_index_u64' needs target feature sve2}} - // overload-error@+1 {{'svstnt1h_scatter_index' needs target feature sve2}} + // expected-error@+2 {{'svstnt1h_scatter_u64base_index_u64' needs target feature sve,sve2}} + // overload-error@+1 {{'svstnt1h_scatter_index' needs target feature sve,sve2}} SVE_ACLE_FUNC(svstnt1h_scatter, _u64base, _index, _u64)(pg, svundef_u64(), i64, svundef_u64()); - // expected-error@+2 {{'svstnt1b_scatter_u64base_u64' needs target feature sve2}} - // overload-error@+1 {{'svstnt1b_scatter' needs target feature sve2}} + // expected-error@+2 {{'svstnt1b_scatter_u64base_u64' needs target feature sve,sve2}} + // overload-error@+1 {{'svstnt1b_scatter' needs target feature sve,sve2}} SVE_ACLE_FUNC(svstnt1b_scatter, _u64base, , _u64)(pg, svundef_u64(), svundef_u64()); - // expected-error@+2 {{'svstnt1b_scatter_s64offset_u64' needs target feature sve2}} - // overload-error@+1 {{'svstnt1b_scatter_offset' needs target feature sve2}} + // expected-error@+2 {{'svstnt1b_scatter_s64offset_u64' needs target feature sve,sve2}} + // overload-error@+1 {{'svstnt1b_scatter_offset' needs target feature sve,sve2}} SVE_ACLE_FUNC(svstnt1b_scatter_, s64, offset, _u64)(pg, u8_ptr, svundef_s64(), svundef_u64()); - // expected-error@+2 {{'svstnt1b_scatter_u64offset_u64' needs target feature sve2}} - // overload-error@+1 {{'svstnt1b_scatter_offset' needs target feature sve2}} + // expected-error@+2 {{'svstnt1b_scatter_u64offset_u64' needs target feature sve,sve2}} + // overload-error@+1 {{'svstnt1b_scatter_offset' needs target feature sve,sve2}} SVE_ACLE_FUNC(svstnt1b_scatter_, u64, offset, _u64)(pg, u8_ptr, svundef_u64(), svundef_u64()); - // expected-error@+2 {{'svstnt1b_scatter_u64base_offset_u64' needs target feature sve2}} - // overload-error@+1 {{'svstnt1b_scatter_offset' needs target feature sve2}} + // expected-error@+2 {{'svstnt1b_scatter_u64base_offset_u64' needs target feature sve,sve2}} + // overload-error@+1 {{'svstnt1b_scatter_offset' needs target feature sve,sve2}} SVE_ACLE_FUNC(svstnt1b_scatter, _u64base, _offset, _u64)(pg, svundef_u64(), i64, svundef_u64()); - // expected-error@+2 {{'svbsl2n_u64' needs target feature sve2|sme}} - // overload-error@+1 {{'svbsl2n' needs target feature sve2|sme}} + // expected-error@+2 {{'svbsl2n_u64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svbsl2n' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svbsl2n,_u64,,)(svundef_u64(), svundef_u64(), svundef_u64()); - // expected-error@+2 {{'svbsl2n_n_u64' needs target feature sve2|sme}} - // overload-error@+1 {{'svbsl2n' needs target feature sve2|sme}} + // expected-error@+2 {{'svbsl2n_n_u64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svbsl2n' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svbsl2n,_n_u64,,)(svundef_u64(), svundef_u64(), u64); - // expected-error@+2 {{'svaddlt_u64' needs target feature sve2|sme}} - // overload-error@+1 {{'svaddlt' needs target feature sve2|sme}} + // expected-error@+2 {{'svaddlt_u64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svaddlt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svaddlt,_u64,,)(svundef_u32(), svundef_u32()); - // expected-error@+2 {{'svaddlt_n_u64' needs target feature sve2|sme}} - // overload-error@+1 {{'svaddlt' needs target feature sve2|sme}} + // expected-error@+2 {{'svaddlt_n_u64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svaddlt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svaddlt,_n_u64,,)(svundef_u32(), u32); - // expected-error@+2 {{'svstnt1w_scatter_u64base_u64' needs target feature sve2}} - // overload-error@+1 {{'svstnt1w_scatter' needs target feature sve2}} + // expected-error@+2 {{'svstnt1w_scatter_u64base_u64' needs target feature sve,sve2}} + // overload-error@+1 {{'svstnt1w_scatter' needs target feature sve,sve2}} SVE_ACLE_FUNC(svstnt1w_scatter, _u64base, , _u64)(pg, svundef_u64(), svundef_u64()); - // expected-error@+2 {{'svstnt1w_scatter_s64offset_u64' needs target feature sve2}} - // overload-error@+1 {{'svstnt1w_scatter_offset' needs target feature sve2}} + // expected-error@+2 {{'svstnt1w_scatter_s64offset_u64' needs target feature sve,sve2}} + // overload-error@+1 {{'svstnt1w_scatter_offset' needs target feature sve,sve2}} SVE_ACLE_FUNC(svstnt1w_scatter_, s64, offset, _u64)(pg, u32_ptr, svundef_s64(), svundef_u64()); - // expected-error@+2 {{'svstnt1w_scatter_u64offset_u64' needs target feature sve2}} - // overload-error@+1 {{'svstnt1w_scatter_offset' needs target feature sve2}} + // expected-error@+2 {{'svstnt1w_scatter_u64offset_u64' needs target feature sve,sve2}} + // overload-error@+1 {{'svstnt1w_scatter_offset' needs target feature sve,sve2}} SVE_ACLE_FUNC(svstnt1w_scatter_, u64, offset, _u64)(pg, u32_ptr, svundef_u64(), svundef_u64()); - // expected-error@+2 {{'svstnt1w_scatter_u64base_offset_u64' needs target feature sve2}} - // overload-error@+1 {{'svstnt1w_scatter_offset' needs target feature sve2}} + // expected-error@+2 {{'svstnt1w_scatter_u64base_offset_u64' needs target feature sve,sve2}} + // overload-error@+1 {{'svstnt1w_scatter_offset' needs target feature sve,sve2}} SVE_ACLE_FUNC(svstnt1w_scatter, _u64base, _offset, _u64)(pg, svundef_u64(), i64, svundef_u64()); - // expected-error@+2 {{'svstnt1w_scatter_s64index_u64' needs target feature sve2}} - // overload-error@+1 {{'svstnt1w_scatter_index' needs target feature sve2}} + // expected-error@+2 {{'svstnt1w_scatter_s64index_u64' needs target feature sve,sve2}} + // overload-error@+1 {{'svstnt1w_scatter_index' needs target feature sve,sve2}} SVE_ACLE_FUNC(svstnt1w_scatter_, s64, index, _u64)(pg, u32_ptr, svundef_s64(), svundef_u64()); - // expected-error@+2 {{'svstnt1w_scatter_u64index_u64' needs target feature sve2}} - // overload-error@+1 {{'svstnt1w_scatter_index' needs target feature sve2}} + // expected-error@+2 {{'svstnt1w_scatter_u64index_u64' needs target feature sve,sve2}} + // overload-error@+1 {{'svstnt1w_scatter_index' needs target feature sve,sve2}} SVE_ACLE_FUNC(svstnt1w_scatter_, u64, index, _u64)(pg, u32_ptr, svundef_u64(), svundef_u64()); - // expected-error@+2 {{'svstnt1w_scatter_u64base_index_u64' needs target feature sve2}} - // overload-error@+1 {{'svstnt1w_scatter_index' needs target feature sve2}} + // expected-error@+2 {{'svstnt1w_scatter_u64base_index_u64' needs target feature sve,sve2}} + // overload-error@+1 {{'svstnt1w_scatter_index' needs target feature sve,sve2}} SVE_ACLE_FUNC(svstnt1w_scatter, _u64base, _index, _u64)(pg, svundef_u64(), i64, svundef_u64()); - // expected-error@+2 {{'svabalb_u64' needs target feature sve2|sme}} - // overload-error@+1 {{'svabalb' needs target feature sve2|sme}} + // expected-error@+2 {{'svabalb_u64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svabalb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svabalb,_u64,,)(svundef_u64(), svundef_u32(), svundef_u32()); - // expected-error@+2 {{'svabalb_n_u64' needs target feature sve2|sme}} - // overload-error@+1 {{'svabalb' needs target feature sve2|sme}} + // expected-error@+2 {{'svabalb_n_u64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svabalb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svabalb,_n_u64,,)(svundef_u64(), svundef_u32(), u32); - // expected-error@+2 {{'svsublb_u64' needs target feature sve2|sme}} - // overload-error@+1 {{'svsublb' needs target feature sve2|sme}} + // expected-error@+2 {{'svsublb_u64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svsublb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svsublb,_u64,,)(svundef_u32(), svundef_u32()); - // expected-error@+2 {{'svsublb_n_u64' needs target feature sve2|sme}} - // overload-error@+1 {{'svsublb' needs target feature sve2|sme}} + // expected-error@+2 {{'svsublb_n_u64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svsublb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svsublb,_n_u64,,)(svundef_u32(), u32); - // expected-error@+2 {{'svsbclb_u64' needs target feature sve2|sme}} - // overload-error@+1 {{'svsbclb' needs target feature sve2|sme}} + // expected-error@+2 {{'svsbclb_u64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svsbclb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svsbclb,_u64,,)(svundef_u64(), svundef_u64(), svundef_u64()); - // expected-error@+2 {{'svsbclb_n_u64' needs target feature sve2|sme}} - // overload-error@+1 {{'svsbclb' needs target feature sve2|sme}} + // expected-error@+2 {{'svsbclb_n_u64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svsbclb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svsbclb,_n_u64,,)(svundef_u64(), svundef_u64(), u64); - // expected-error@+2 {{'svbsl1n_u64' needs target feature sve2|sme}} - // overload-error@+1 {{'svbsl1n' needs target feature sve2|sme}} + // expected-error@+2 {{'svbsl1n_u64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svbsl1n' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svbsl1n,_u64,,)(svundef_u64(), svundef_u64(), svundef_u64()); - // expected-error@+2 {{'svbsl1n_n_u64' needs target feature sve2|sme}} - // overload-error@+1 {{'svbsl1n' needs target feature sve2|sme}} + // expected-error@+2 {{'svbsl1n_n_u64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svbsl1n' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svbsl1n,_n_u64,,)(svundef_u64(), svundef_u64(), u64); - // expected-error@+2 {{'svrshl_u64_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svrshl_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svrshl_u64_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrshl_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrshl,_u64,_z,)(pg, svundef_u64(), svundef_s64()); - // expected-error@+2 {{'svrshl_u64_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svrshl_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svrshl_u64_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrshl_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrshl,_u64,_m,)(pg, svundef_u64(), svundef_s64()); - // expected-error@+2 {{'svrshl_u64_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svrshl_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svrshl_u64_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrshl_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrshl,_u64,_x,)(pg, svundef_u64(), svundef_s64()); - // expected-error@+2 {{'svrshl_n_u64_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svrshl_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svrshl_n_u64_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrshl_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrshl,_n_u64,_z,)(pg, svundef_u64(), i64); - // expected-error@+2 {{'svrshl_n_u64_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svrshl_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svrshl_n_u64_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrshl_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrshl,_n_u64,_m,)(pg, svundef_u64(), i64); - // expected-error@+2 {{'svrshl_n_u64_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svrshl_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svrshl_n_u64_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svrshl_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svrshl,_n_u64,_x,)(pg, svundef_u64(), i64); - // expected-error@+2 {{'svaddwt_u64' needs target feature sve2|sme}} - // overload-error@+1 {{'svaddwt' needs target feature sve2|sme}} + // expected-error@+2 {{'svaddwt_u64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svaddwt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svaddwt,_u64,,)(svundef_u64(), svundef_u32()); - // expected-error@+2 {{'svaddwt_n_u64' needs target feature sve2|sme}} - // overload-error@+1 {{'svaddwt' needs target feature sve2|sme}} + // expected-error@+2 {{'svaddwt_n_u64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svaddwt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svaddwt,_n_u64,,)(svundef_u64(), u32); - // expected-error@+2 {{'svmlslb_u64' needs target feature sve2|sme}} - // overload-error@+1 {{'svmlslb' needs target feature sve2|sme}} + // expected-error@+2 {{'svmlslb_u64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svmlslb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svmlslb,_u64,,)(svundef_u64(), svundef_u32(), svundef_u32()); - // expected-error@+2 {{'svmlslb_n_u64' needs target feature sve2|sme}} - // overload-error@+1 {{'svmlslb' needs target feature sve2|sme}} + // expected-error@+2 {{'svmlslb_n_u64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svmlslb' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svmlslb,_n_u64,,)(svundef_u64(), svundef_u32(), u32); - // expected-error@+2 {{'svmlslt_u64' needs target feature sve2|sme}} - // overload-error@+1 {{'svmlslt' needs target feature sve2|sme}} + // expected-error@+2 {{'svmlslt_u64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svmlslt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svmlslt,_u64,,)(svundef_u64(), svundef_u32(), svundef_u32()); - // expected-error@+2 {{'svmlslt_n_u64' needs target feature sve2|sme}} - // overload-error@+1 {{'svmlslt' needs target feature sve2|sme}} + // expected-error@+2 {{'svmlslt_n_u64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svmlslt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svmlslt,_n_u64,,)(svundef_u64(), svundef_u32(), u32); - // expected-error@+2 {{'svmovlt_u64' needs target feature sve2|sme}} - // overload-error@+1 {{'svmovlt' needs target feature sve2|sme}} + // expected-error@+2 {{'svmovlt_u64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svmovlt' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svmovlt,_u64,,)(svundef_u32()); - // expected-error@+2 {{'svqshl_u64_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svqshl_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svqshl_u64_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqshl_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqshl,_u64,_z,)(pg, svundef_u64(), svundef_s64()); - // expected-error@+2 {{'svqshl_u64_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svqshl_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svqshl_u64_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqshl_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqshl,_u64,_m,)(pg, svundef_u64(), svundef_s64()); - // expected-error@+2 {{'svqshl_u64_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svqshl_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svqshl_u64_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqshl_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqshl,_u64,_x,)(pg, svundef_u64(), svundef_s64()); - // expected-error@+2 {{'svqshl_n_u64_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svqshl_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svqshl_n_u64_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqshl_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqshl,_n_u64,_z,)(pg, svundef_u64(), i64); - // expected-error@+2 {{'svqshl_n_u64_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svqshl_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svqshl_n_u64_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqshl_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqshl,_n_u64,_m,)(pg, svundef_u64(), i64); - // expected-error@+2 {{'svqshl_n_u64_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svqshl_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svqshl_n_u64_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svqshl_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svqshl,_n_u64,_x,)(pg, svundef_u64(), i64); - // expected-error@+2 {{'svlogb_f16_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svlogb_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svlogb_f16_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svlogb_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svlogb,_f16,_z,)(pg, svundef_f16()); - // expected-error@+2 {{'svlogb_f16_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svlogb_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svlogb_f16_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svlogb_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svlogb,_f16,_m,)(svundef_s16(), pg, svundef_f16()); - // expected-error@+2 {{'svlogb_f16_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svlogb_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svlogb_f16_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svlogb_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svlogb,_f16,_x,)(pg, svundef_f16()); - // expected-error@+2 {{'svminnmp_f16_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svminnmp_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svminnmp_f16_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svminnmp_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svminnmp,_f16,_m,)(pg, svundef_f16(), svundef_f16()); - // expected-error@+2 {{'svminnmp_f16_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svminnmp_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svminnmp_f16_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svminnmp_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svminnmp,_f16,_x,)(pg, svundef_f16(), svundef_f16()); - // expected-error@+2 {{'svtbl2_f16' needs target feature sve2|sme}} - // overload-error@+1 {{'svtbl2' needs target feature sve2|sme}} + // expected-error@+2 {{'svtbl2_f16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svtbl2' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svtbl2,_f16,,)(svundef2_f16(), svundef_u16()); - // expected-error@+2 {{'svaddp_f16_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svaddp_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svaddp_f16_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svaddp_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svaddp,_f16,_m,)(pg, svundef_f16(), svundef_f16()); - // expected-error@+2 {{'svaddp_f16_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svaddp_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svaddp_f16_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svaddp_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svaddp,_f16,_x,)(pg, svundef_f16(), svundef_f16()); - // expected-error@+2 {{'svtbx_f16' needs target feature sve2|sme}} - // overload-error@+1 {{'svtbx' needs target feature sve2|sme}} + // expected-error@+2 {{'svtbx_f16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svtbx' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svtbx,_f16,,)(svundef_f16(), svundef_f16(), svundef_u16()); - // expected-error@+2 {{'svminp_f16_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svminp_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svminp_f16_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svminp_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svminp,_f16,_m,)(pg, svundef_f16(), svundef_f16()); - // expected-error@+2 {{'svminp_f16_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svminp_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svminp_f16_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svminp_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svminp,_f16,_x,)(pg, svundef_f16(), svundef_f16()); - // expected-error@+2 {{'svmaxp_f16_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svmaxp_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svmaxp_f16_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svmaxp_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svmaxp,_f16,_m,)(pg, svundef_f16(), svundef_f16()); - // expected-error@+2 {{'svmaxp_f16_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svmaxp_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svmaxp_f16_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svmaxp_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svmaxp,_f16,_x,)(pg, svundef_f16(), svundef_f16()); - // expected-error@+2 {{'svmaxnmp_f16_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svmaxnmp_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svmaxnmp_f16_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svmaxnmp_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svmaxnmp,_f16,_m,)(pg, svundef_f16(), svundef_f16()); - // expected-error@+2 {{'svmaxnmp_f16_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svmaxnmp_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svmaxnmp_f16_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svmaxnmp_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svmaxnmp,_f16,_x,)(pg, svundef_f16(), svundef_f16()); - // expected-error@+2 {{'svwhilerw_f16' needs target feature sve2|sme}} - // overload-error@+1 {{'svwhilerw' needs target feature sve2|sme}} + // expected-error@+2 {{'svwhilerw_f16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svwhilerw' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svwhilerw,_f16,,)(const_f16_ptr, const_f16_ptr); - // expected-error@+2 {{'svwhilewr_f16' needs target feature sve2|sme}} - // overload-error@+1 {{'svwhilewr' needs target feature sve2|sme}} + // expected-error@+2 {{'svwhilewr_f16' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svwhilewr' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svwhilewr,_f16,,)(const_f16_ptr, const_f16_ptr); - // expected-error@+2 {{'svcvtlt_f32_f16_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svcvtlt_f32_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svcvtlt_f32_f16_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svcvtlt_f32_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svcvtlt_f32,_f16,_m,)(svundef_f32(), pg, svundef_f16()); - // expected-error@+2 {{'svcvtlt_f32_f16_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svcvtlt_f32_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svcvtlt_f32_f16_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svcvtlt_f32_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svcvtlt_f32,_f16,_x,)(pg, svundef_f16()); - // expected-error@+2 {{'svlogb_f32_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svlogb_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svlogb_f32_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svlogb_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svlogb,_f32,_z,)(pg, svundef_f32()); - // expected-error@+2 {{'svlogb_f32_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svlogb_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svlogb_f32_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svlogb_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svlogb,_f32,_m,)(svundef_s32(), pg, svundef_f32()); - // expected-error@+2 {{'svlogb_f32_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svlogb_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svlogb_f32_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svlogb_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svlogb,_f32,_x,)(pg, svundef_f32()); - // expected-error@+2 {{'svminnmp_f32_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svminnmp_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svminnmp_f32_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svminnmp_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svminnmp,_f32,_m,)(pg, svundef_f32(), svundef_f32()); - // expected-error@+2 {{'svminnmp_f32_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svminnmp_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svminnmp_f32_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svminnmp_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svminnmp,_f32,_x,)(pg, svundef_f32(), svundef_f32()); - // expected-error@+2 {{'svtbl2_f32' needs target feature sve2|sme}} - // overload-error@+1 {{'svtbl2' needs target feature sve2|sme}} + // expected-error@+2 {{'svtbl2_f32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svtbl2' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svtbl2,_f32,,)(svundef2_f32(), svundef_u32()); - // expected-error@+2 {{'svaddp_f32_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svaddp_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svaddp_f32_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svaddp_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svaddp,_f32,_m,)(pg, svundef_f32(), svundef_f32()); - // expected-error@+2 {{'svaddp_f32_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svaddp_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svaddp_f32_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svaddp_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svaddp,_f32,_x,)(pg, svundef_f32(), svundef_f32()); - // expected-error@+2 {{'svtbx_f32' needs target feature sve2|sme}} - // overload-error@+1 {{'svtbx' needs target feature sve2|sme}} + // expected-error@+2 {{'svtbx_f32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svtbx' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svtbx,_f32,,)(svundef_f32(), svundef_f32(), svundef_u32()); - // expected-error@+2 {{'svminp_f32_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svminp_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svminp_f32_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svminp_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svminp,_f32,_m,)(pg, svundef_f32(), svundef_f32()); - // expected-error@+2 {{'svminp_f32_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svminp_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svminp_f32_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svminp_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svminp,_f32,_x,)(pg, svundef_f32(), svundef_f32()); - // expected-error@+2 {{'svstnt1_scatter_u32base_f32' needs target feature sve2}} - // overload-error@+1 {{'svstnt1_scatter' needs target feature sve2}} + // expected-error@+2 {{'svstnt1_scatter_u32base_f32' needs target feature sve,sve2}} + // overload-error@+1 {{'svstnt1_scatter' needs target feature sve,sve2}} SVE_ACLE_FUNC(svstnt1_scatter, _u32base, , _f32)(pg, svundef_u32(), svundef_f32()); - // expected-error@+2 {{'svstnt1_scatter_u32offset_f32' needs target feature sve2}} - // overload-error@+1 {{'svstnt1_scatter_offset' needs target feature sve2}} + // expected-error@+2 {{'svstnt1_scatter_u32offset_f32' needs target feature sve,sve2}} + // overload-error@+1 {{'svstnt1_scatter_offset' needs target feature sve,sve2}} SVE_ACLE_FUNC(svstnt1_scatter_, u32, offset, _f32)(pg, f32_ptr, svundef_u32(), svundef_f32()); - // expected-error@+2 {{'svstnt1_scatter_u32base_offset_f32' needs target feature sve2}} - // overload-error@+1 {{'svstnt1_scatter_offset' needs target feature sve2}} + // expected-error@+2 {{'svstnt1_scatter_u32base_offset_f32' needs target feature sve,sve2}} + // overload-error@+1 {{'svstnt1_scatter_offset' needs target feature sve,sve2}} SVE_ACLE_FUNC(svstnt1_scatter, _u32base, _offset, _f32)(pg, svundef_u32(), i64, svundef_f32()); - // expected-error@+2 {{'svstnt1_scatter_u32base_index_f32' needs target feature sve2}} - // overload-error@+1 {{'svstnt1_scatter_index' needs target feature sve2}} + // expected-error@+2 {{'svstnt1_scatter_u32base_index_f32' needs target feature sve,sve2}} + // overload-error@+1 {{'svstnt1_scatter_index' needs target feature sve,sve2}} SVE_ACLE_FUNC(svstnt1_scatter, _u32base, _index, _f32)(pg, svundef_u32(), i64, svundef_f32()); - // expected-error@+2 {{'svmaxp_f32_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svmaxp_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svmaxp_f32_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svmaxp_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svmaxp,_f32,_m,)(pg, svundef_f32(), svundef_f32()); - // expected-error@+2 {{'svmaxp_f32_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svmaxp_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svmaxp_f32_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svmaxp_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svmaxp,_f32,_x,)(pg, svundef_f32(), svundef_f32()); - // expected-error@+2 {{'svmaxnmp_f32_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svmaxnmp_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svmaxnmp_f32_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svmaxnmp_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svmaxnmp,_f32,_m,)(pg, svundef_f32(), svundef_f32()); - // expected-error@+2 {{'svmaxnmp_f32_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svmaxnmp_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svmaxnmp_f32_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svmaxnmp_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svmaxnmp,_f32,_x,)(pg, svundef_f32(), svundef_f32()); - // expected-error@+2 {{'svwhilerw_f32' needs target feature sve2|sme}} - // overload-error@+1 {{'svwhilerw' needs target feature sve2|sme}} + // expected-error@+2 {{'svwhilerw_f32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svwhilerw' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svwhilerw,_f32,,)(const_f32_ptr, const_f32_ptr); - // expected-error@+2 {{'svcvtnt_f16_f32_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svcvtnt_f16_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svcvtnt_f16_f32_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svcvtnt_f16_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svcvtnt_f16,_f32,_m,)(svundef_f16(), pg, svundef_f32()); - // expected-error@+2 {{'svcvtnt_f16_f32_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svcvtnt_f16_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svcvtnt_f16_f32_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svcvtnt_f16_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svcvtnt_f16,_f32,_x,)(svundef_f16(), pg, svundef_f32()); - // expected-error@+2 {{'svwhilewr_f32' needs target feature sve2|sme}} - // overload-error@+1 {{'svwhilewr' needs target feature sve2|sme}} + // expected-error@+2 {{'svwhilewr_f32' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svwhilewr' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svwhilewr,_f32,,)(const_f32_ptr, const_f32_ptr); - // expected-error@+2 {{'svcvtlt_f64_f32_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svcvtlt_f64_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svcvtlt_f64_f32_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svcvtlt_f64_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svcvtlt_f64,_f32,_m,)(svundef_f64(), pg, svundef_f32()); - // expected-error@+2 {{'svcvtlt_f64_f32_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svcvtlt_f64_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svcvtlt_f64_f32_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svcvtlt_f64_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svcvtlt_f64,_f32,_x,)(pg, svundef_f32()); - // expected-error@+2 {{'svldnt1_gather_u32base_f32' needs target feature sve2}} - // overload-error@+1 {{'svldnt1_gather_f32' needs target feature sve2}} + // expected-error@+2 {{'svldnt1_gather_u32base_f32' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1_gather_f32' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1_gather, _u32base, _f32, )(pg, svundef_u32()); - // expected-error@+2 {{'svldnt1_gather_u32offset_f32' needs target feature sve2}} - // overload-error@+1 {{'svldnt1_gather_offset' needs target feature sve2}} + // expected-error@+2 {{'svldnt1_gather_u32offset_f32' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1_gather_offset' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1_gather_, u32, offset, _f32)(pg, const_f32_ptr, svundef_u32()); - // expected-error@+2 {{'svldnt1_gather_u32base_offset_f32' needs target feature sve2}} - // overload-error@+1 {{'svldnt1_gather_offset_f32' needs target feature sve2}} + // expected-error@+2 {{'svldnt1_gather_u32base_offset_f32' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1_gather_offset_f32' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1_gather, _u32base, _offset_f32, )(pg, svundef_u32(), i64); - // expected-error@+2 {{'svldnt1_gather_u32base_index_f32' needs target feature sve2}} - // overload-error@+1 {{'svldnt1_gather_index_f32' needs target feature sve2}} + // expected-error@+2 {{'svldnt1_gather_u32base_index_f32' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1_gather_index_f32' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1_gather, _u32base, _index_f32, )(pg, svundef_u32(), i64); - // expected-error@+2 {{'svlogb_f64_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svlogb_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svlogb_f64_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svlogb_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svlogb,_f64,_z,)(pg, svundef_f64()); - // expected-error@+2 {{'svlogb_f64_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svlogb_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svlogb_f64_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svlogb_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svlogb,_f64,_m,)(svundef_s64(), pg, svundef_f64()); - // expected-error@+2 {{'svlogb_f64_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svlogb_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svlogb_f64_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svlogb_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svlogb,_f64,_x,)(pg, svundef_f64()); - // expected-error@+2 {{'svminnmp_f64_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svminnmp_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svminnmp_f64_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svminnmp_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svminnmp,_f64,_m,)(pg, svundef_f64(), svundef_f64()); - // expected-error@+2 {{'svminnmp_f64_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svminnmp_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svminnmp_f64_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svminnmp_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svminnmp,_f64,_x,)(pg, svundef_f64(), svundef_f64()); - // expected-error@+2 {{'svtbl2_f64' needs target feature sve2|sme}} - // overload-error@+1 {{'svtbl2' needs target feature sve2|sme}} + // expected-error@+2 {{'svtbl2_f64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svtbl2' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svtbl2,_f64,,)(svundef2_f64(), svundef_u64()); - // expected-error@+2 {{'svaddp_f64_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svaddp_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svaddp_f64_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svaddp_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svaddp,_f64,_m,)(pg, svundef_f64(), svundef_f64()); - // expected-error@+2 {{'svaddp_f64_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svaddp_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svaddp_f64_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svaddp_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svaddp,_f64,_x,)(pg, svundef_f64(), svundef_f64()); - // expected-error@+2 {{'svtbx_f64' needs target feature sve2|sme}} - // overload-error@+1 {{'svtbx' needs target feature sve2|sme}} + // expected-error@+2 {{'svtbx_f64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svtbx' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svtbx,_f64,,)(svundef_f64(), svundef_f64(), svundef_u64()); - // expected-error@+2 {{'svminp_f64_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svminp_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svminp_f64_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svminp_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svminp,_f64,_m,)(pg, svundef_f64(), svundef_f64()); - // expected-error@+2 {{'svminp_f64_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svminp_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svminp_f64_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svminp_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svminp,_f64,_x,)(pg, svundef_f64(), svundef_f64()); - // expected-error@+2 {{'svstnt1_scatter_u64base_f64' needs target feature sve2}} - // overload-error@+1 {{'svstnt1_scatter' needs target feature sve2}} + // expected-error@+2 {{'svstnt1_scatter_u64base_f64' needs target feature sve,sve2}} + // overload-error@+1 {{'svstnt1_scatter' needs target feature sve,sve2}} SVE_ACLE_FUNC(svstnt1_scatter, _u64base, , _f64)(pg, svundef_u64(), svundef_f64()); - // expected-error@+2 {{'svstnt1_scatter_s64offset_f64' needs target feature sve2}} - // overload-error@+1 {{'svstnt1_scatter_offset' needs target feature sve2}} + // expected-error@+2 {{'svstnt1_scatter_s64offset_f64' needs target feature sve,sve2}} + // overload-error@+1 {{'svstnt1_scatter_offset' needs target feature sve,sve2}} SVE_ACLE_FUNC(svstnt1_scatter_, s64, offset, _f64)(pg, f64_ptr, svundef_s64(), svundef_f64()); - // expected-error@+2 {{'svstnt1_scatter_u64offset_f64' needs target feature sve2}} - // overload-error@+1 {{'svstnt1_scatter_offset' needs target feature sve2}} + // expected-error@+2 {{'svstnt1_scatter_u64offset_f64' needs target feature sve,sve2}} + // overload-error@+1 {{'svstnt1_scatter_offset' needs target feature sve,sve2}} SVE_ACLE_FUNC(svstnt1_scatter_, u64, offset, _f64)(pg, f64_ptr, svundef_u64(), svundef_f64()); - // expected-error@+2 {{'svstnt1_scatter_u64base_offset_f64' needs target feature sve2}} - // overload-error@+1 {{'svstnt1_scatter_offset' needs target feature sve2}} + // expected-error@+2 {{'svstnt1_scatter_u64base_offset_f64' needs target feature sve,sve2}} + // overload-error@+1 {{'svstnt1_scatter_offset' needs target feature sve,sve2}} SVE_ACLE_FUNC(svstnt1_scatter, _u64base, _offset, _f64)(pg, svundef_u64(), i64, svundef_f64()); - // expected-error@+2 {{'svstnt1_scatter_s64index_f64' needs target feature sve2}} - // overload-error@+1 {{'svstnt1_scatter_index' needs target feature sve2}} + // expected-error@+2 {{'svstnt1_scatter_s64index_f64' needs target feature sve,sve2}} + // overload-error@+1 {{'svstnt1_scatter_index' needs target feature sve,sve2}} SVE_ACLE_FUNC(svstnt1_scatter_, s64, index, _f64)(pg, f64_ptr, svundef_s64(), svundef_f64()); - // expected-error@+2 {{'svstnt1_scatter_u64index_f64' needs target feature sve2}} - // overload-error@+1 {{'svstnt1_scatter_index' needs target feature sve2}} + // expected-error@+2 {{'svstnt1_scatter_u64index_f64' needs target feature sve,sve2}} + // overload-error@+1 {{'svstnt1_scatter_index' needs target feature sve,sve2}} SVE_ACLE_FUNC(svstnt1_scatter_, u64, index, _f64)(pg, f64_ptr, svundef_u64(), svundef_f64()); - // expected-error@+2 {{'svstnt1_scatter_u64base_index_f64' needs target feature sve2}} - // overload-error@+1 {{'svstnt1_scatter_index' needs target feature sve2}} + // expected-error@+2 {{'svstnt1_scatter_u64base_index_f64' needs target feature sve,sve2}} + // overload-error@+1 {{'svstnt1_scatter_index' needs target feature sve,sve2}} SVE_ACLE_FUNC(svstnt1_scatter, _u64base, _index, _f64)(pg, svundef_u64(), i64, svundef_f64()); - // expected-error@+2 {{'svmaxp_f64_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svmaxp_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svmaxp_f64_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svmaxp_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svmaxp,_f64,_m,)(pg, svundef_f64(), svundef_f64()); - // expected-error@+2 {{'svmaxp_f64_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svmaxp_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svmaxp_f64_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svmaxp_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svmaxp,_f64,_x,)(pg, svundef_f64(), svundef_f64()); - // expected-error@+2 {{'svmaxnmp_f64_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svmaxnmp_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svmaxnmp_f64_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svmaxnmp_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svmaxnmp,_f64,_m,)(pg, svundef_f64(), svundef_f64()); - // expected-error@+2 {{'svmaxnmp_f64_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svmaxnmp_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svmaxnmp_f64_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svmaxnmp_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svmaxnmp,_f64,_x,)(pg, svundef_f64(), svundef_f64()); - // expected-error@+2 {{'svwhilerw_f64' needs target feature sve2|sme}} - // overload-error@+1 {{'svwhilerw' needs target feature sve2|sme}} + // expected-error@+2 {{'svwhilerw_f64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svwhilerw' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svwhilerw,_f64,,)(const_f64_ptr, const_f64_ptr); - // expected-error@+2 {{'svcvtnt_f32_f64_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svcvtnt_f32_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svcvtnt_f32_f64_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svcvtnt_f32_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svcvtnt_f32,_f64,_m,)(svundef_f32(), pg, svundef_f64()); - // expected-error@+2 {{'svcvtnt_f32_f64_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svcvtnt_f32_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svcvtnt_f32_f64_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svcvtnt_f32_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svcvtnt_f32,_f64,_x,)(svundef_f32(), pg, svundef_f64()); - // expected-error@+2 {{'svwhilewr_f64' needs target feature sve2|sme}} - // overload-error@+1 {{'svwhilewr' needs target feature sve2|sme}} + // expected-error@+2 {{'svwhilewr_f64' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svwhilewr' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svwhilewr,_f64,,)(const_f64_ptr, const_f64_ptr); - // expected-error@+2 {{'svcvtx_f32_f64_z' needs target feature sve2|sme}} - // overload-error@+1 {{'svcvtx_f32_z' needs target feature sve2|sme}} + // expected-error@+2 {{'svcvtx_f32_f64_z' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svcvtx_f32_z' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svcvtx_f32,_f64,_z,)(pg, svundef_f64()); - // expected-error@+2 {{'svcvtx_f32_f64_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svcvtx_f32_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svcvtx_f32_f64_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svcvtx_f32_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svcvtx_f32,_f64,_m,)(svundef_f32(), pg, svundef_f64()); - // expected-error@+2 {{'svcvtx_f32_f64_x' needs target feature sve2|sme}} - // overload-error@+1 {{'svcvtx_f32_x' needs target feature sve2|sme}} + // expected-error@+2 {{'svcvtx_f32_f64_x' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svcvtx_f32_x' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svcvtx_f32,_f64,_x,)(pg, svundef_f64()); - // expected-error@+2 {{'svldnt1_gather_u64base_f64' needs target feature sve2}} - // overload-error@+1 {{'svldnt1_gather_f64' needs target feature sve2}} + // expected-error@+2 {{'svldnt1_gather_u64base_f64' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1_gather_f64' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1_gather, _u64base, _f64, )(pg, svundef_u64()); - // expected-error@+2 {{'svldnt1_gather_s64offset_f64' needs target feature sve2}} - // overload-error@+1 {{'svldnt1_gather_offset' needs target feature sve2}} + // expected-error@+2 {{'svldnt1_gather_s64offset_f64' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1_gather_offset' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1_gather_, s64, offset, _f64)(pg, const_f64_ptr, svundef_s64()); - // expected-error@+2 {{'svldnt1_gather_u64offset_f64' needs target feature sve2}} - // overload-error@+1 {{'svldnt1_gather_offset' needs target feature sve2}} + // expected-error@+2 {{'svldnt1_gather_u64offset_f64' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1_gather_offset' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1_gather_, u64, offset, _f64)(pg, const_f64_ptr, svundef_u64()); - // expected-error@+2 {{'svldnt1_gather_u64base_offset_f64' needs target feature sve2}} - // overload-error@+1 {{'svldnt1_gather_offset_f64' needs target feature sve2}} + // expected-error@+2 {{'svldnt1_gather_u64base_offset_f64' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1_gather_offset_f64' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1_gather, _u64base, _offset_f64, )(pg, svundef_u64(), i64); - // expected-error@+2 {{'svldnt1_gather_s64index_f64' needs target feature sve2}} - // overload-error@+1 {{'svldnt1_gather_index' needs target feature sve2}} + // expected-error@+2 {{'svldnt1_gather_s64index_f64' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1_gather_index' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1_gather_, s64, index, _f64)(pg, const_f64_ptr, svundef_s64()); - // expected-error@+2 {{'svldnt1_gather_u64index_f64' needs target feature sve2}} - // overload-error@+1 {{'svldnt1_gather_index' needs target feature sve2}} + // expected-error@+2 {{'svldnt1_gather_u64index_f64' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1_gather_index' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1_gather_, u64, index, _f64)(pg, const_f64_ptr, svundef_u64()); - // expected-error@+2 {{'svldnt1_gather_u64base_index_f64' needs target feature sve2}} - // overload-error@+1 {{'svldnt1_gather_index_f64' needs target feature sve2}} + // expected-error@+2 {{'svldnt1_gather_u64base_index_f64' needs target feature sve,sve2}} + // overload-error@+1 {{'svldnt1_gather_index_f64' needs target feature sve,sve2}} SVE_ACLE_FUNC(svldnt1_gather, _u64base, _index_f64, )(pg, svundef_u64(), i64); - // expected-error@+2 {{'svcvtxnt_f32_f64_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svcvtxnt_f32_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svcvtxnt_f32_f64_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svcvtxnt_f32_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svcvtxnt_f32,_f64,_m,)(svundef_f32(), pg, svundef_f64()); - // expected-error@+2 {{'svcvtxnt_f32_f64_m' needs target feature sve2|sme}} - // overload-error@+1 {{'svcvtxnt_f32_m' needs target feature sve2|sme}} + // expected-error@+2 {{'svcvtxnt_f32_f64_m' needs target feature (sve,sve2)|sme}} + // overload-error@+1 {{'svcvtxnt_f32_m' needs target feature (sve,sve2)|sme}} SVE_ACLE_FUNC(svcvtxnt_f32,_f64,_x,)(svundef_f32(), pg, svundef_f64()); } diff --git a/clang/test/Sema/aarch64-sve2-intrinsics/acle_sve2_aes_bitperm_sha3_sm4.cpp b/clang/test/Sema/aarch64-sve2-intrinsics/acle_sve2_aes_bitperm_sha3_sm4.cpp index 81cbb2dce921bb2..795bb7605330342 100644 --- a/clang/test/Sema/aarch64-sve2-intrinsics/acle_sve2_aes_bitperm_sha3_sm4.cpp +++ b/clang/test/Sema/aarch64-sve2-intrinsics/acle_sve2_aes_bitperm_sha3_sm4.cpp @@ -14,116 +14,116 @@ void test(uint8_t u8, uint16_t u16, uint32_t u32, uint64_t u64) { - // expected-error@+2 {{'svaesd_u8' needs target feature sve2-aes}} - // overload-error@+1 {{'svaesd' needs target feature sve2-aes}} + // expected-error@+2 {{'svaesd_u8' needs target feature sve,sve2-aes}} + // overload-error@+1 {{'svaesd' needs target feature sve,sve2-aes}} SVE_ACLE_FUNC(svaesd,_u8,,)(svundef_u8(), svundef_u8()); - // expected-error@+2 {{'svaese_u8' needs target feature sve2-aes}} - // overload-error@+1 {{'svaese' needs target feature sve2-aes}} + // expected-error@+2 {{'svaese_u8' needs target feature sve,sve2-aes}} + // overload-error@+1 {{'svaese' needs target feature sve,sve2-aes}} SVE_ACLE_FUNC(svaese,_u8,,)(svundef_u8(), svundef_u8()); - // expected-error@+2 {{'svaesimc_u8' needs target feature sve2-aes}} - // overload-error@+1 {{'svaesimc' needs target feature sve2-aes}} + // expected-error@+2 {{'svaesimc_u8' needs target feature sve,sve2-aes}} + // overload-error@+1 {{'svaesimc' needs target feature sve,sve2-aes}} SVE_ACLE_FUNC(svaesimc,_u8,,)(svundef_u8()); - // expected-error@+2 {{'svaesmc_u8' needs target feature sve2-aes}} - // overload-error@+1 {{'svaesmc' needs target feature sve2-aes}} + // expected-error@+2 {{'svaesmc_u8' needs target feature sve,sve2-aes}} + // overload-error@+1 {{'svaesmc' needs target feature sve,sve2-aes}} SVE_ACLE_FUNC(svaesmc,_u8,,)(svundef_u8()); - // expected-error@+2 {{'svbdep_u8' needs target feature sve2-bitperm}} - // overload-error@+1 {{'svbdep' needs target feature sve2-bitperm}} + // expected-error@+2 {{'svbdep_u8' needs target feature sve,sve2-bitperm}} + // overload-error@+1 {{'svbdep' needs target feature sve,sve2-bitperm}} SVE_ACLE_FUNC(svbdep,_u8,,)(svundef_u8(), svundef_u8()); - // expected-error@+2 {{'svbdep_n_u8' needs target feature sve2-bitperm}} - // overload-error@+1 {{'svbdep' needs target feature sve2-bitperm}} + // expected-error@+2 {{'svbdep_n_u8' needs target feature sve,sve2-bitperm}} + // overload-error@+1 {{'svbdep' needs target feature sve,sve2-bitperm}} SVE_ACLE_FUNC(svbdep,_n_u8,,)(svundef_u8(), u8); - // expected-error@+2 {{'svbext_u8' needs target feature sve2-bitperm}} - // overload-error@+1 {{'svbext' needs target feature sve2-bitperm}} + // expected-error@+2 {{'svbext_u8' needs target feature sve,sve2-bitperm}} + // overload-error@+1 {{'svbext' needs target feature sve,sve2-bitperm}} SVE_ACLE_FUNC(svbext,_u8,,)(svundef_u8(), svundef_u8()); - // expected-error@+2 {{'svbext_n_u8' needs target feature sve2-bitperm}} - // overload-error@+1 {{'svbext' needs target feature sve2-bitperm}} + // expected-error@+2 {{'svbext_n_u8' needs target feature sve,sve2-bitperm}} + // overload-error@+1 {{'svbext' needs target feature sve,sve2-bitperm}} SVE_ACLE_FUNC(svbext,_n_u8,,)(svundef_u8(), u8); - // expected-error@+2 {{'svbgrp_u8' needs target feature sve2-bitperm}} - // overload-error@+1 {{'svbgrp' needs target feature sve2-bitperm}} + // expected-error@+2 {{'svbgrp_u8' needs target feature sve,sve2-bitperm}} + // overload-error@+1 {{'svbgrp' needs target feature sve,sve2-bitperm}} SVE_ACLE_FUNC(svbgrp,_u8,,)(svundef_u8(), svundef_u8()); - // expected-error@+2 {{'svbgrp_n_u8' needs target feature sve2-bitperm}} - // overload-error@+1 {{'svbgrp' needs target feature sve2-bitperm}} + // expected-error@+2 {{'svbgrp_n_u8' needs target feature sve,sve2-bitperm}} + // overload-error@+1 {{'svbgrp' needs target feature sve,sve2-bitperm}} SVE_ACLE_FUNC(svbgrp,_n_u8,,)(svundef_u8(), u8); - // expected-error@+2 {{'svbdep_u16' needs target feature sve2-bitperm}} - // overload-error@+1 {{'svbdep' needs target feature sve2-bitperm}} + // expected-error@+2 {{'svbdep_u16' needs target feature sve,sve2-bitperm}} + // overload-error@+1 {{'svbdep' needs target feature sve,sve2-bitperm}} SVE_ACLE_FUNC(svbdep,_u16,,)(svundef_u16(), svundef_u16()); - // expected-error@+2 {{'svbdep_n_u16' needs target feature sve2-bitperm}} - // overload-error@+1 {{'svbdep' needs target feature sve2-bitperm}} + // expected-error@+2 {{'svbdep_n_u16' needs target feature sve,sve2-bitperm}} + // overload-error@+1 {{'svbdep' needs target feature sve,sve2-bitperm}} SVE_ACLE_FUNC(svbdep,_n_u16,,)(svundef_u16(), u16); - // expected-error@+2 {{'svbext_u16' needs target feature sve2-bitperm}} - // overload-error@+1 {{'svbext' needs target feature sve2-bitperm}} + // expected-error@+2 {{'svbext_u16' needs target feature sve,sve2-bitperm}} + // overload-error@+1 {{'svbext' needs target feature sve,sve2-bitperm}} SVE_ACLE_FUNC(svbext,_u16,,)(svundef_u16(), svundef_u16()); - // expected-error@+2 {{'svbext_n_u16' needs target feature sve2-bitperm}} - // overload-error@+1 {{'svbext' needs target feature sve2-bitperm}} + // expected-error@+2 {{'svbext_n_u16' needs target feature sve,sve2-bitperm}} + // overload-error@+1 {{'svbext' needs target feature sve,sve2-bitperm}} SVE_ACLE_FUNC(svbext,_n_u16,,)(svundef_u16(), u16); - // expected-error@+2 {{'svbgrp_u16' needs target feature sve2-bitperm}} - // overload-error@+1 {{'svbgrp' needs target feature sve2-bitperm}} + // expected-error@+2 {{'svbgrp_u16' needs target feature sve,sve2-bitperm}} + // overload-error@+1 {{'svbgrp' needs target feature sve,sve2-bitperm}} SVE_ACLE_FUNC(svbgrp,_u16,,)(svundef_u16(), svundef_u16()); - // expected-error@+2 {{'svbgrp_n_u16' needs target feature sve2-bitperm}} - // overload-error@+1 {{'svbgrp' needs target feature sve2-bitperm}} + // expected-error@+2 {{'svbgrp_n_u16' needs target feature sve,sve2-bitperm}} + // overload-error@+1 {{'svbgrp' needs target feature sve,sve2-bitperm}} SVE_ACLE_FUNC(svbgrp,_n_u16,,)(svundef_u16(), u16); - // expected-error@+2 {{'svbdep_u32' needs target feature sve2-bitperm}} - // overload-error@+1 {{'svbdep' needs target feature sve2-bitperm}} + // expected-error@+2 {{'svbdep_u32' needs target feature sve,sve2-bitperm}} + // overload-error@+1 {{'svbdep' needs target feature sve,sve2-bitperm}} SVE_ACLE_FUNC(svbdep,_u32,,)(svundef_u32(), svundef_u32()); - // expected-error@+2 {{'svbdep_n_u32' needs target feature sve2-bitperm}} - // overload-error@+1 {{'svbdep' needs target feature sve2-bitperm}} + // expected-error@+2 {{'svbdep_n_u32' needs target feature sve,sve2-bitperm}} + // overload-error@+1 {{'svbdep' needs target feature sve,sve2-bitperm}} SVE_ACLE_FUNC(svbdep,_n_u32,,)(svundef_u32(), u32); - // expected-error@+2 {{'svbext_u32' needs target feature sve2-bitperm}} - // overload-error@+1 {{'svbext' needs target feature sve2-bitperm}} + // expected-error@+2 {{'svbext_u32' needs target feature sve,sve2-bitperm}} + // overload-error@+1 {{'svbext' needs target feature sve,sve2-bitperm}} SVE_ACLE_FUNC(svbext,_u32,,)(svundef_u32(), svundef_u32()); - // expected-error@+2 {{'svbext_n_u32' needs target feature sve2-bitperm}} - // overload-error@+1 {{'svbext' needs target feature sve2-bitperm}} + // expected-error@+2 {{'svbext_n_u32' needs target feature sve,sve2-bitperm}} + // overload-error@+1 {{'svbext' needs target feature sve,sve2-bitperm}} SVE_ACLE_FUNC(svbext,_n_u32,,)(svundef_u32(), u32); - // expected-error@+2 {{'svbgrp_u32' needs target feature sve2-bitperm}} - // overload-error@+1 {{'svbgrp' needs target feature sve2-bitperm}} + // expected-error@+2 {{'svbgrp_u32' needs target feature sve,sve2-bitperm}} + // overload-error@+1 {{'svbgrp' needs target feature sve,sve2-bitperm}} SVE_ACLE_FUNC(svbgrp,_u32,,)(svundef_u32(), svundef_u32()); - // expected-error@+2 {{'svbgrp_n_u32' needs target feature sve2-bitperm}} - // overload-error@+1 {{'svbgrp' needs target feature sve2-bitperm}} + // expected-error@+2 {{'svbgrp_n_u32' needs target feature sve,sve2-bitperm}} + // overload-error@+1 {{'svbgrp' needs target feature sve,sve2-bitperm}} SVE_ACLE_FUNC(svbgrp,_n_u32,,)(svundef_u32(), u32); - // expected-error@+2 {{'svsm4e_u32' needs target feature sve2-sm4}} - // overload-error@+1 {{'svsm4e' needs target feature sve2-sm4}} + // expected-error@+2 {{'svsm4e_u32' needs target feature sve,sve2-sm4}} + // overload-error@+1 {{'svsm4e' needs target feature sve,sve2-sm4}} SVE_ACLE_FUNC(svsm4e,_u32,,)(svundef_u32(), svundef_u32()); - // expected-error@+2 {{'svsm4ekey_u32' needs target feature sve2-sm4}} - // overload-error@+1 {{'svsm4ekey' needs target feature sve2-sm4}} + // expected-error@+2 {{'svsm4ekey_u32' needs target feature sve,sve2-sm4}} + // overload-error@+1 {{'svsm4ekey' needs target feature sve,sve2-sm4}} SVE_ACLE_FUNC(svsm4ekey,_u32,,)(svundef_u32(), svundef_u32()); - // expected-error@+2 {{'svbdep_u64' needs target feature sve2-bitperm}} - // overload-error@+1 {{'svbdep' needs target feature sve2-bitperm}} + // expected-error@+2 {{'svbdep_u64' needs target feature sve,sve2-bitperm}} + // overload-error@+1 {{'svbdep' needs target feature sve,sve2-bitperm}} SVE_ACLE_FUNC(svbdep,_u64,,)(svundef_u64(), svundef_u64()); - // expected-error@+2 {{'svbdep_n_u64' needs target feature sve2-bitperm}} - // overload-error@+1 {{'svbdep' needs target feature sve2-bitperm}} + // expected-error@+2 {{'svbdep_n_u64' needs target feature sve,sve2-bitperm}} + // overload-error@+1 {{'svbdep' needs target feature sve,sve2-bitperm}} SVE_ACLE_FUNC(svbdep,_n_u64,,)(svundef_u64(), u64); - // expected-error@+2 {{'svbext_u64' needs target feature sve2-bitperm}} - // overload-error@+1 {{'svbext' needs target feature sve2-bitperm}} + // expected-error@+2 {{'svbext_u64' needs target feature sve,sve2-bitperm}} + // overload-error@+1 {{'svbext' needs target feature sve,sve2-bitperm}} SVE_ACLE_FUNC(svbext,_u64,,)(svundef_u64(), svundef_u64()); - // expected-error@+2 {{'svbext_n_u64' needs target feature sve2-bitperm}} - // overload-error@+1 {{'svbext' needs target feature sve2-bitperm}} + // expected-error@+2 {{'svbext_n_u64' needs target feature sve,sve2-bitperm}} + // overload-error@+1 {{'svbext' needs target feature sve,sve2-bitperm}} SVE_ACLE_FUNC(svbext,_n_u64,,)(svundef_u64(), u64); - // expected-error@+2 {{'svbgrp_u64' needs target feature sve2-bitperm}} - // overload-error@+1 {{'svbgrp' needs target feature sve2-bitperm}} + // expected-error@+2 {{'svbgrp_u64' needs target feature sve,sve2-bitperm}} + // overload-error@+1 {{'svbgrp' needs target feature sve,sve2-bitperm}} SVE_ACLE_FUNC(svbgrp,_u64,,)(svundef_u64(), svundef_u64()); - // expected-error@+2 {{'svbgrp_n_u64' needs target feature sve2-bitperm}} - // overload-error@+1 {{'svbgrp' needs target feature sve2-bitperm}} + // expected-error@+2 {{'svbgrp_n_u64' needs target feature sve,sve2-bitperm}} + // overload-error@+1 {{'svbgrp' needs target feature sve,sve2-bitperm}} SVE_ACLE_FUNC(svbgrp,_n_u64,,)(svundef_u64(), u64); - // expected-error@+2 {{'svpmullb_pair_u64' needs target feature sve2-aes}} - // overload-error@+1 {{'svpmullb_pair' needs target feature sve2-aes}} + // expected-error@+2 {{'svpmullb_pair_u64' needs target feature sve,sve2-aes}} + // overload-error@+1 {{'svpmullb_pair' needs target feature sve,sve2-aes}} SVE_ACLE_FUNC(svpmullb_pair,_u64,,)(svundef_u64(), svundef_u64()); - // expected-error@+2 {{'svpmullb_pair_n_u64' needs target feature sve2-aes}} - // overload-error@+1 {{'svpmullb_pair' needs target feature sve2-aes}} + // expected-error@+2 {{'svpmullb_pair_n_u64' needs target feature sve,sve2-aes}} + // overload-error@+1 {{'svpmullb_pair' needs target feature sve,sve2-aes}} SVE_ACLE_FUNC(svpmullb_pair,_n_u64,,)(svundef_u64(), u64); - // expected-error@+2 {{'svpmullt_pair_u64' needs target feature sve2-aes}} - // overload-error@+1 {{'svpmullt_pair' needs target feature sve2-aes}} + // expected-error@+2 {{'svpmullt_pair_u64' needs target feature sve,sve2-aes}} + // overload-error@+1 {{'svpmullt_pair' needs target feature sve,sve2-aes}} SVE_ACLE_FUNC(svpmullt_pair,_u64,,)(svundef_u64(), svundef_u64()); - // expected-error@+2 {{'svpmullt_pair_n_u64' needs target feature sve2-aes}} - // overload-error@+1 {{'svpmullt_pair' needs target feature sve2-aes}} + // expected-error@+2 {{'svpmullt_pair_n_u64' needs target feature sve,sve2-aes}} + // overload-error@+1 {{'svpmullt_pair' needs target feature sve,sve2-aes}} SVE_ACLE_FUNC(svpmullt_pair,_n_u64,,)(svundef_u64(), u64); - // expected-error@+2 {{'svrax1_u64' needs target feature sve2-sha3}} - // overload-error@+1 {{'svrax1' needs target feature sve2-sha3}} + // expected-error@+2 {{'svrax1_u64' needs target feature sve,sve2-sha3}} + // overload-error@+1 {{'svrax1' needs target feature sve,sve2-sha3}} SVE_ACLE_FUNC(svrax1,_u64,,)(svundef_u64(), svundef_u64()); - // expected-error@+2 {{'svrax1_s64' needs target feature sve2-sha3}} - // overload-error@+1 {{'svrax1' needs target feature sve2-sha3}} + // expected-error@+2 {{'svrax1_s64' needs target feature sve,sve2-sha3}} + // overload-error@+1 {{'svrax1' needs target feature sve,sve2-sha3}} SVE_ACLE_FUNC(svrax1,_s64,,)(svundef_s64(), svundef_s64()); } diff --git a/clang/test/Sema/aarch64-sve2-intrinsics/acle_sve2_bfloat.cpp b/clang/test/Sema/aarch64-sve2-intrinsics/acle_sve2_bfloat.cpp index bb72a3eaf60eca7..4a2f8238caf0ef8 100644 --- a/clang/test/Sema/aarch64-sve2-intrinsics/acle_sve2_bfloat.cpp +++ b/clang/test/Sema/aarch64-sve2-intrinsics/acle_sve2_bfloat.cpp @@ -14,16 +14,16 @@ void test_bfloat(const bfloat16_t *const_bf16_ptr, svbfloat16_t bf16, svbfloat16x2_t bf16x2) { - // expected-error@+2 {{'svwhilerw_bf16' needs target feature (sve2,bf16)|(sme,bf16)}} - // overload-error@+1 {{'svwhilerw' needs target feature (sve2,bf16)|(sme,bf16)}} + // expected-error@+2 {{'svwhilerw_bf16' needs target feature (sve,sve2,bf16)|(sme,bf16)}} + // overload-error@+1 {{'svwhilerw' needs target feature (sve,sve2,bf16)|(sme,bf16)}} SVE_ACLE_FUNC(svwhilerw,_bf16,,)(const_bf16_ptr, const_bf16_ptr); - // expected-error@+2 {{'svtbx_bf16' needs target feature (sve2,bf16)|(sme,bf16)}} - // overload-error@+1 {{'svtbx' needs target feature (sve2,bf16)|(sme,bf16)}} + // expected-error@+2 {{'svtbx_bf16' needs target feature (sve,sve2,bf16)|(sme,bf16)}} + // overload-error@+1 {{'svtbx' needs target feature (sve,sve2,bf16)|(sme,bf16)}} SVE_ACLE_FUNC(svtbx,_bf16,,)(bf16, bf16, svundef_u16()); - // expected-error@+2 {{'svtbl2_bf16' needs target feature (sve2,bf16)|(sme,bf16)}} - // overload-error@+1 {{'svtbl2' needs target feature (sve2,bf16)|(sme,bf16)}} + // expected-error@+2 {{'svtbl2_bf16' needs target feature (sve,sve2,bf16)|(sme,bf16)}} + // overload-error@+1 {{'svtbl2' needs target feature (sve,sve2,bf16)|(sme,bf16)}} SVE_ACLE_FUNC(svtbl2,_bf16,,)(bf16x2, svundef_u16()); - // expected-error@+2 {{'svwhilewr_bf16' needs target feature (sve2,bf16)|(sme,bf16)}} - // overload-error@+1 {{'svwhilewr' needs target feature (sve2,bf16)|(sme,bf16)}} + // expected-error@+2 {{'svwhilewr_bf16' needs target feature (sve,sve2,bf16)|(sme,bf16)}} + // overload-error@+1 {{'svwhilewr' needs target feature (sve,sve2,bf16)|(sme,bf16)}} SVE_ACLE_FUNC(svwhilewr,_bf16,,)(const_bf16_ptr, const_bf16_ptr); } diff --git a/clang/test/Sema/aarch64-sve2p1-intrinsics/acle_sve2p1_b16b16.cpp b/clang/test/Sema/aarch64-sve2p1-intrinsics/acle_sve2p1_b16b16.cpp index 188054f4517140d..f191a442fefe306 100644 --- a/clang/test/Sema/aarch64-sve2p1-intrinsics/acle_sve2p1_b16b16.cpp +++ b/clang/test/Sema/aarch64-sve2p1-intrinsics/acle_sve2p1_b16b16.cpp @@ -29,28 +29,28 @@ void test_with_sve_b16b16(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2, svbfl void test_no_sve_b16b16(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2, svbfloat16_t op3) MODE_ATTR { - // expected-error@+1 {{'svclamp_bf16' needs target feature (sve2,sve-b16b16)|(sme2,sve-b16b16)}} + // expected-error@+1 {{'svclamp_bf16' needs target feature (sve,sve2,sve-b16b16)|(sme,sme2,sve-b16b16)}} svclamp_bf16(op1, op2, op3); - // expected-error@+1 {{'svadd_bf16_m' needs target feature (sve2,sve-b16b16)|(sme2,sve-b16b16)}} + // expected-error@+1 {{'svadd_bf16_m' needs target feature (sve,sve2,sve-b16b16)|(sme,sme2,sve-b16b16)}} svadd_bf16_m(pg, op1, op2); - // expected-error@+1 {{'svmax_bf16_m' needs target feature (sve2,sve-b16b16)|(sme2,sve-b16b16)}} + // expected-error@+1 {{'svmax_bf16_m' needs target feature (sve,sve2,sve-b16b16)|(sme,sme2,sve-b16b16)}} svmax_bf16_m(pg, op1, op2); - // expected-error@+1 {{'svmaxnm_bf16_m' needs target feature (sve2,sve-b16b16)|(sme2,sve-b16b16)}} + // expected-error@+1 {{'svmaxnm_bf16_m' needs target feature (sve,sve2,sve-b16b16)|(sme,sme2,sve-b16b16)}} svmaxnm_bf16_m(pg, op1, op2); - // expected-error@+1 {{'svmin_bf16_m' needs target feature (sve2,sve-b16b16)|(sme2,sve-b16b16)}} + // expected-error@+1 {{'svmin_bf16_m' needs target feature (sve,sve2,sve-b16b16)|(sme,sme2,sve-b16b16)}} svmin_bf16_m(pg, op1, op2); - // expected-error@+1 {{'svminnm_bf16_m' needs target feature (sve2,sve-b16b16)|(sme2,sve-b16b16)}} + // expected-error@+1 {{'svminnm_bf16_m' needs target feature (sve,sve2,sve-b16b16)|(sme,sme2,sve-b16b16)}} svminnm_bf16_m(pg, op1, op2); - // expected-error@+1 {{'svmla_lane_bf16' needs target feature (sve2,sve-b16b16)|(sme2,sve-b16b16)}} + // expected-error@+1 {{'svmla_lane_bf16' needs target feature (sve,sve2,sve-b16b16)|(sme,sme2,sve-b16b16)}} svmla_lane_bf16(op1, op2, op3, 1); - // expected-error@+1 {{'svmla_bf16_m' needs target feature (sve2,sve-b16b16)|(sme2,sve-b16b16)}} + // expected-error@+1 {{'svmla_bf16_m' needs target feature (sve,sve2,sve-b16b16)|(sme,sme2,sve-b16b16)}} svmla_bf16_m(pg, op1, op2, op3); - // expected-error@+1 {{'svmls_bf16_m' needs target feature (sve2,sve-b16b16)|(sme2,sve-b16b16)}} + // expected-error@+1 {{'svmls_bf16_m' needs target feature (sve,sve2,sve-b16b16)|(sme,sme2,sve-b16b16)}} svmls_bf16_m(pg, op1, op2, op3); - // expected-error@+1 {{'svmul_lane_bf16' needs target feature (sve2,sve-b16b16)|(sme2,sve-b16b16)}} + // expected-error@+1 {{'svmul_lane_bf16' needs target feature (sve,sve2,sve-b16b16)|(sme,sme2,sve-b16b16)}} svmul_lane_bf16(op1, op2, 1); - // expected-error@+1 {{'svmul_bf16_m' needs target feature (sve2,sve-b16b16)|(sme2,sve-b16b16)}} + // expected-error@+1 {{'svmul_bf16_m' needs target feature (sve,sve2,sve-b16b16)|(sme,sme2,sve-b16b16)}} svmul_bf16_m(pg, op1, op2); - // expected-error@+1 {{'svsub_bf16_m' needs target feature (sve2,sve-b16b16)|(sme2,sve-b16b16)}} + // expected-error@+1 {{'svsub_bf16_m' needs target feature (sve,sve2,sve-b16b16)|(sme,sme2,sve-b16b16)}} svsub_bf16_m(pg, op1, op2); } diff --git a/clang/test/Sema/attr-nonblocking-constraints.cpp b/clang/test/Sema/attr-nonblocking-constraints.cpp index c694860069c9601..f23093d4dc8a96f 100644 --- a/clang/test/Sema/attr-nonblocking-constraints.cpp +++ b/clang/test/Sema/attr-nonblocking-constraints.cpp @@ -156,6 +156,17 @@ void nb10( static_cast(fp1)(); // expected-warning {{function with 'nonblocking' attribute must not call non-'nonblocking' expression}} } +// Expression involving indirection +int nb10a() [[clang::nonblocking]]; +int nb10b() [[clang::nonblocking]]; +int blocking(); + +int nb10c(bool x) [[clang::nonblocking]] +{ + int y = (x ? nb10a : blocking)(); // expected-warning {{attribute 'nonblocking' should not be added via type conversion}} + return (x ? nb10a : nb10b)(); // No diagnostic. +} + // Interactions with nonblocking(false) void nb11_no_inference_1() [[clang::nonblocking(false)]] // expected-note {{function does not permit inference of 'nonblocking'}} { diff --git a/clang/test/Sema/attr-target-clones-aarch64.c b/clang/test/Sema/attr-target-clones-aarch64.c index 6f0185816b0ac98..2765c06c68fbb82 100644 --- a/clang/test/Sema/attr-target-clones-aarch64.c +++ b/clang/test/Sema/attr-target-clones-aarch64.c @@ -4,8 +4,6 @@ void __attribute__((target_clones("fp16+sve2-aes", "sb+sve2-sha3+rcpc3+mops", "r // expected-warning@+1 {{unsupported 'default' in the 'target_clones' attribute string; 'target_clones' attribute ignored}} void __attribute__((target_clones("default+sha3"))) warn1(void); -// expected-warning@+1 {{version list contains entries that don't impact code generation}} -void __attribute__((target_clones("ssbs"))) warn2(void); // expected-error@+2 {{'target_clones' and 'target_version' attributes are not compatible}} // expected-note@+1 {{conflicting attribute is here}} @@ -49,7 +47,6 @@ int __attribute__((target_clones("fp", "fp", "crc+dotprod", "dotprod+crc"))) dup // expected-warning@+1 {{version list contains duplicate entries}} int __attribute__((target_clones("fp16+memtag", "memtag+fp16"))) dup6(void) { return 6; } -int __attribute__((target_clones("simd+ssbs2", "simd+dpb2"))) dup7(void) { return 7; } // expected-warning@+1 {{unsupported '' in the 'target_clones' attribute string;}} void __attribute__((target_clones(""))) empty_target_1(void); @@ -79,6 +76,6 @@ int useage(void) { return mv_after_use(); } // expected-error@+1 {{function declaration cannot become a multiversioned function after first usage}} -int __attribute__((target_clones("sve2-sha3+ssbs2", "sm4"))) mv_after_use(void) { return 1; } +int __attribute__((target_clones("sve2-sha3+ssbs", "sm4"))) mv_after_use(void) { return 1; } // expected-error@+1 {{'main' cannot be a multiversioned function}} int __attribute__((target_clones("sve-i8mm"))) main() { return 1; } diff --git a/clang/test/Sema/caret-diags-register-variable.cpp b/clang/test/Sema/caret-diags-register-variable.cpp new file mode 100644 index 000000000000000..24f5061d4b4d2cf --- /dev/null +++ b/clang/test/Sema/caret-diags-register-variable.cpp @@ -0,0 +1,20 @@ +// RUN: not %clang_cc1 -triple i386-pc-linux-gnu -std=c++11 -fsyntax-only -fno-diagnostics-show-line-numbers -fcaret-diagnostics-max-lines=5 %s 2>&1 | FileCheck %s -strict-whitespace + +struct foo { + int a; +}; + +//CHECK: {{.*}}: error: bad type for named register variable +//CHECK-NEXT: {{^}}register struct foo bar asm("esp"); +//CHECK-NEXT: {{^}} ^~~~~~~~~~{{$}} +register struct foo bar asm("esp"); + +//CHECK: {{.*}}: error: register 'edi' unsuitable for global register variables on this target +//CHECK-NEXT: {{^}}register int r0 asm ("edi"); +//CHECK-NEXT: {{^}} ^{{$}} +register int r0 asm ("edi"); + +//CHECK: {{.*}}: error: size of register 'esp' does not match variable size +//CHECK-NEXT: {{^}}register long long r1 asm ("esp"); +//CHECK-NEXT: {{^}} ^{{$}} +register long long r1 asm ("esp"); diff --git a/clang/test/SemaCXX/attr-target-version-riscv.cpp b/clang/test/SemaCXX/attr-target-version-riscv.cpp new file mode 100644 index 000000000000000..785a3c6abafe8c3 --- /dev/null +++ b/clang/test/SemaCXX/attr-target-version-riscv.cpp @@ -0,0 +1,113 @@ +// RUN: %clang_cc1 -triple riscv64-linux-gnu -fsyntax-only -verify -fexceptions -fcxx-exceptions %s -std=c++14 + +// expected-warning@+2 {{unsupported 'arch=rv64gcv' in the 'target_version' attribute string; 'target_version' attribute ignored}} +// expected-note@+1 {{previous definition is here}} +__attribute__((target_version("arch=rv64gcv"))) int fullArchString(void) { return 2; } +// expected-error@+2 {{redefinition of 'fullArchString'}} +// expected-warning@+1 {{unsupported 'arch=default' in the 'target_version' attribute string; 'target_version' attribute ignored}} +__attribute__((target_version("arch=default"))) int fullArchString(void) { return 2; } + +// expected-warning@+2 {{unsupported 'mcpu=sifive-u74' in the 'target_version' attribute string; 'target_version' attribute ignored}} +// expected-note@+1 {{previous definition is here}} +__attribute__((target_version("mcpu=sifive-u74"))) int mcpu(void) { return 2; } +// expected-error@+1 {{redefinition of 'mcpu'}} +__attribute__((target_version("default"))) int mcpu(void) { return 2; } + +// expected-warning@+2 {{unsupported 'mtune=sifive-u74' in the 'target_version' attribute string; 'target_version' attribute ignored}} +// expected-note@+1 {{previous definition is here}} +__attribute__((target_version("mtune=sifive-u74"))) int mtune(void) { return 2; } +// expected-error@+1 {{redefinition of 'mtune'}} +__attribute__((target_version("default"))) int mtune(void) { return 2; } + +// expected-warning@+2 {{unsupported '' in the 'target_version' attribute string; 'target_version' attribute ignored}} +// expected-note@+1 {{previous definition is here}} +__attribute__((target_version(""))) int emptyVersion(void) { return 2; } +// expected-error@+1 {{redefinition of 'emptyVersion'}} +__attribute__((target_version("default"))) int emptyVersion(void) { return 2; } + +// expected-note@+1 {{previous definition is here}} +__attribute__((target_version("arch=+c"))) int dupVersion(void) { return 2; } +// expected-error@+1 {{redefinition of 'dupVersion'}} +__attribute__((target_version("arch=+c"))) int dupVersion(void) { return 2; } +__attribute__((target_version("default"))) int dupVersion(void) { return 2; } + +// expected-warning@+2 {{unsupported 'arch=+zicsr' in the 'target_version' attribute string; 'target_version' attribute ignored}} +// expected-note@+1 {{previous definition is here}} +__attribute__((target_version("arch=+zicsr"))) int UnsupportBitMaskExt(void) { return 2; } +// expected-error@+1 {{redefinition of 'UnsupportBitMaskExt'}} +__attribute__((target_version("default"))) int UnsupportBitMaskExt(void) { return 2; } + +// expected-warning@+2 {{unsupported 'NotADigit' in the 'target_version' attribute string; 'target_version' attribute ignored}} +// expected-note@+1 {{previous definition is here}} +__attribute__((target_version("arch=+c;priority=NotADigit"))) int UnsupportPriority(void) { return 2; } +// expected-error@+1 {{redefinition of 'UnsupportPriority'}} +__attribute__((target_version("default"))) int UnsupportPriority(void) { return 2;} + +// expected-warning@+1 {{unsupported 'default;priority=2' in the 'target_version' attribute string; 'target_version' attribute ignored}} +__attribute__((target_version("default;priority=2"))) int UnsupportDefaultPriority(void) { return 2; } + +// expected-warning@+2 {{unsupported 'arch=+c,zbb' in the 'target_version' attribute string; 'target_version' attribute ignored}} +// expected-note@+1 {{previous definition is here}} +__attribute__((target_version("arch=+c,zbb"))) int WithoutAddSign(void) { return 2;} +// expected-error@+1 {{redefinition of 'WithoutAddSign'}} +__attribute__((target_version("default"))) int WithoutAddSign(void) { return 2; } + +// expected-warning@+2 {{unsupported 'arch=+c;default' in the 'target_version' attribute string; 'target_version' attribute ignored}} +// expected-note@+1 {{previous definition is here}} +__attribute__((target_version("arch=+c;default"))) int DefaultInVersion(void) { return 2;} +// expected-error@+1 {{redefinition of 'DefaultInVersion'}} +__attribute__((target_version("default"))) int DefaultInVersion(void) { return 2; } + +// expected-warning@+2 {{unsupported '' in the 'target_version' attribute string; 'target_version' attribute ignored}} +// expected-note@+1 {{previous definition is here}} +__attribute__((target_version("arch=+c;"))) int EmptyVersionAfterSemiColon(void) { return 2;} +// expected-error@+1 {{redefinition of 'EmptyVersionAfterSemiColon'}} +__attribute__((target_version("default"))) int EmptyVersionAfterSemiColon(void) { return 2; } + +// expected-warning@+2 {{unsupported 'arch=+c;arch=+f' in the 'target_version' attribute string; 'target_version' attribute ignored}} +// expected-note@+1 {{previous definition is here}} +__attribute__((target_version("arch=+c;arch=+f"))) int dupArch(void) { return 2; } +// expected-error@+1 {{redefinition of 'dupArch'}} +__attribute__((target_version("default"))) int dupArch(void) { return 2; } + +// expected-warning@+2 {{unsupported 'default;default' in the 'target_version' attribute string; 'target_version' attribute ignored}} +// expected-note@+1 {{previous definition is here}} +__attribute__((target_version("default;default"))) int dupDefault(void) { return 2;} +// expected-error@+1 {{redefinition of 'dupDefault'}} +__attribute__((target_version("default"))) int dupDefault(void) { return 2; } + +// expected-warning@+2 {{unsupported 'priority=1;priority=2' in the 'target_version' attribute string; 'target_version' attribute ignored}} +// expected-note@+1 {{previous definition is here}} +__attribute__((target_version("priority=1;priority=2"))) int dupPriority(void) { return 2; } +// expected-error@+1 {{redefinition of 'dupPriority'}} +__attribute__((target_version("default"))) int dupPriority(void) { return 2; } + +// expected-warning@+2 {{unsupported '=1' in the 'target_version' attribute string; 'target_version' attribute ignored}} +// expected-note@+1 {{previous definition is here}} +__attribute__((target_version("=1"))) int invalidVerson1(void) { return 2; } +// expected-error@+1 {{redefinition of 'invalidVerson1'}} +__attribute__((target_version("default"))) int invalidVerson1(void) { return 2; } + +// expected-warning@+2 {{unsupported '=+v' in the 'target_version' attribute string; 'target_version' attribute ignored}} +// expected-note@+1 {{previous definition is here}} +__attribute__((target_version("=+v"))) int invalidVerson2(void) { return 2; } +// expected-error@+1 {{redefinition of 'invalidVerson2'}} +__attribute__((target_version("default"))) int invalidVerson2(void) { return 2; } + +// expected-warning@+2 {{unsupported 'v' in the 'target_version' attribute string; 'target_version' attribute ignored}} +// expected-note@+1 {{previous definition is here}} +__attribute__((target_version("v"))) int invalidVerson3(void) { return 2; } +// expected-error@+1 {{redefinition of 'invalidVerson3'}} +__attribute__((target_version("default"))) int invalidVerson3(void) { return 2; } + +// expected-warning@+2 {{unsupported '' in the 'target_version' attribute string; 'target_version' attribute ignored}} +// expected-note@+1 {{previous definition is here}} +__attribute__((target_version(";"))) int invalidVerson4(void) { return 2; } +// expected-error@+1 {{redefinition of 'invalidVerson4'}} +__attribute__((target_version("default"))) int invalidVerson4(void) { return 2; } + +// expected-warning@+2 {{unsupported 'priority=1' in the 'target_version' attribute string; 'target_version' attribute ignored}} +// expected-note@+1 {{previous definition is here}} +__attribute__((target_version("priority=1"))) int prioriyWithoutArch(void) { return 2; } +// expected-error@+1 {{redefinition of 'prioriyWithoutArch'}} +__attribute__((target_version("default"))) int prioriyWithoutArch(void) { return 2; } diff --git a/clang/test/SemaCXX/attr-target-version.cpp b/clang/test/SemaCXX/attr-target-version.cpp index 14460756db17bfe..2c85f9735a87b16 100644 --- a/clang/test/SemaCXX/attr-target-version.cpp +++ b/clang/test/SemaCXX/attr-target-version.cpp @@ -99,10 +99,10 @@ __attribute__((target_version("jscvt"))) void Decl(); class Out { // #defined-here int __attribute__((target_version("bti"))) func(void); - int __attribute__((target_version("ssbs2"))) func(void); + int __attribute__((target_version("ssbs"))) func(void); }; int __attribute__((target_version("bti"))) Out::func(void) { return 1; } -int __attribute__((target_version("ssbs2"))) Out::func(void) { return 2; } +int __attribute__((target_version("ssbs"))) Out::func(void) { return 2; } // expected-error@+4 {{out-of-line definition of 'func' does not match any declaration in 'Out'}} // expected-note@-2 {{member declaration nearly matches}} // expected-note@-4 {{member declaration nearly matches}} diff --git a/clang/test/SemaCXX/pragma-pack-packed-2.cpp b/clang/test/SemaCXX/pragma-pack-packed-2.cpp index 3639addd6fe5fc7..f288dc04313e694 100644 --- a/clang/test/SemaCXX/pragma-pack-packed-2.cpp +++ b/clang/test/SemaCXX/pragma-pack-packed-2.cpp @@ -1,6 +1,6 @@ // RUN: %clang_cc1 -triple s390x-ibm-zos -fzos-extensions -fsyntax-only -verify %s // RUN: %clang_cc1 -triple s390x-ibm-zos -fzos-extensions -fxl-pragma-pack -fsyntax-only -verify %s -// RUN: %clang -target s390x-ibm-zos -S -emit-llvm -Xclang -verify -fno-xl-pragma-pack %s +// RUN: %clang -target s390x-ibm-zos -S -emit-llvm -Xclang -verify -fno-xl-pragma-pack -o %t.ll %s #pragma pack(show) // expected-warning {{value of #pragma pack(show) == 8}} #pragma pack(twobyte) diff --git a/clang/test/SemaCXX/warn-assignment-condition.cpp b/clang/test/SemaCXX/warn-assignment-condition.cpp index 09084e36bb49169..65332846bd09111 100644 --- a/clang/test/SemaCXX/warn-assignment-condition.cpp +++ b/clang/test/SemaCXX/warn-assignment-condition.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fsyntax-only -Wparentheses -verify %s +// RUN: %clang_cc1 -fsyntax-only -Wparentheses -std=c++2a -verify %s struct A { int foo(); @@ -144,3 +144,48 @@ void test() { f(S()); // expected-note {{in instantiation}} } } + +namespace GH101863 { +void t1(auto... args) { + if (((args == 0) or ...)) { } +} + +template +void t2(Args... args) { + if (((args == 0) or ...)) { } +} + +void t3(auto... args) { + if ((... && (args == 0))) { } +} + +void t4(auto... a, auto... b) { + if (((a == 0) or ...) && ((b == 0) or ...)) { } +} + +void t5(auto... args) { + if ((((args == 0) or ...))) { } +} + +void t6(auto a, auto... b) { + static_assert(__is_same_as(decltype((a)), int&)); + static_assert(__is_same_as(decltype(((b), ...)), int&)); +}; + +void t7(auto... args) { + if ((((args == 0)) or ...)) { } // expected-warning {{equality comparison with extraneous parentheses}} \ + // expected-note {{use '=' to turn this equality comparison into an assignment}} \ + // expected-note {{remove extraneous parentheses around the comparison to silence this warning}} +} + +void test() { + t1(0, 1); + t2<>(); + t3(1, 2, 3); + t3(0, 1); + t4(0, 1); + t5(0, 1); + t6(0, 0); + t7(0); // expected-note {{in instantiation of function template specialization 'GH101863::t7' requested here}} +} +} diff --git a/clang/test/SemaTemplate/ms-function-specialization-class-scope.cpp b/clang/test/SemaTemplate/ms-function-specialization-class-scope.cpp index e1f3ab37ad947cc..aacc092c2c66ca5 100644 --- a/clang/test/SemaTemplate/ms-function-specialization-class-scope.cpp +++ b/clang/test/SemaTemplate/ms-function-specialization-class-scope.cpp @@ -156,7 +156,13 @@ namespace UsesThis { auto h() -> decltype(this); // expected-error {{'this' cannot be used in a static member function declaration}} }; - template struct A; // expected-note 3{{in instantiation of}} + template struct A; // expected-note {{in instantiation of}} + template<> template<> void A::f(); + template<> template<> void A::g(); + void test1() { + A().f(); // expected-note {{in instantiation of}} + A().g(); // expected-note {{in instantiation of}} + } template struct Foo { @@ -390,7 +396,12 @@ namespace UsesThis { } }; - template struct D; // expected-note 2{{in instantiation of}} + template struct D; + + void test2() { + D().non_static_spec(0); // expected-note {{in instantiation of}} + D().static_spec(0); // expected-note {{in instantiation of}} + } template struct E : T { @@ -574,6 +585,23 @@ namespace UsesThis { } }; - template struct E; // expected-note 2{{in instantiation of}} + template struct E; + void test3() { + E().non_static_spec(0); // expected-note {{in instantiation of}} + E().static_spec(0); // expected-note {{in instantiation of}} + } } + +namespace GH111266 { + template struct S { + template auto foo(); + template<> auto foo<1>() { + return [](auto x) { return x; }; + } + }; + template struct S; + void test() { + S().foo<1>(); + } +} // namespace GH111266 diff --git a/clang/unittests/Format/ConfigParseTest.cpp b/clang/unittests/Format/ConfigParseTest.cpp index aa8fbb874e15162..9abb8a110bf8ee9 100644 --- a/clang/unittests/Format/ConfigParseTest.cpp +++ b/clang/unittests/Format/ConfigParseTest.cpp @@ -316,6 +316,13 @@ TEST(ConfigParseTest, ParsesConfiguration) { /*AcrossComments=*/false, /*AlignCompound=*/false, \ /*AlignFunctionDeclarations=*/true, \ /*AlignFunctionPointers=*/false, /*PadOperators=*/true})); \ + CHECK_PARSE( \ + #FIELD ": AcrossComments", FIELD, \ + FormatStyle::AlignConsecutiveStyle( \ + {/*Enabled=*/true, /*AcrossEmptyLines=*/false, \ + /*AcrossComments=*/true, /*AlignCompound=*/false, \ + /*AlignFunctionDeclarations=*/true, \ + /*AlignFunctionPointers=*/false, /*PadOperators=*/true})); \ CHECK_PARSE( \ #FIELD ": AcrossEmptyLinesAndComments", FIELD, \ FormatStyle::AlignConsecutiveStyle( \ @@ -339,6 +346,7 @@ TEST(ConfigParseTest, ParsesConfiguration) { CHECK_PARSE_NESTED_BOOL(FIELD, AcrossComments); \ CHECK_PARSE_NESTED_BOOL(FIELD, AlignCompound); \ CHECK_PARSE_NESTED_BOOL(FIELD, AlignFunctionDeclarations); \ + CHECK_PARSE_NESTED_BOOL(FIELD, AlignFunctionPointers); \ CHECK_PARSE_NESTED_BOOL(FIELD, PadOperators); \ } while (false) diff --git a/clang/unittests/Format/FormatTestProto.cpp b/clang/unittests/Format/FormatTestProto.cpp index 30ce57c545ec767..61c10f8412c7d6a 100644 --- a/clang/unittests/Format/FormatTestProto.cpp +++ b/clang/unittests/Format/FormatTestProto.cpp @@ -190,6 +190,7 @@ TEST_F(FormatTestProto, MessageFieldAttributes) { " aaaaaaaaaaaaaaaa: true\n" " }\n" "];"); + verifyFormat("repeated A a = 1 [(annotation).int32.repeated.test = true];"); } TEST_F(FormatTestProto, DoesntWrapFileOptions) { diff --git a/clang/utils/TableGen/SveEmitter.cpp b/clang/utils/TableGen/SveEmitter.cpp index 8e1dcb45f289f8a..82bbd04f97b4f9c 100644 --- a/clang/utils/TableGen/SveEmitter.cpp +++ b/clang/utils/TableGen/SveEmitter.cpp @@ -953,9 +953,20 @@ Intrinsic::Intrinsic(StringRef Name, StringRef Proto, uint64_t MergeTy, SVEEmitter &Emitter, StringRef SVEGuard, StringRef SMEGuard) : Name(Name.str()), LLVMName(LLVMName), Proto(Proto.str()), - BaseTypeSpec(BT), Class(Class), SVEGuard(SVEGuard.str()), - SMEGuard(SMEGuard.str()), MergeSuffix(MergeSuffix.str()), + BaseTypeSpec(BT), Class(Class), MergeSuffix(MergeSuffix.str()), BaseType(BT, 'd'), Flags(Flags), ImmChecks(Checks) { + + auto FormatGuard = [](StringRef Guard, StringRef Base) -> std::string { + if (Guard.contains('|')) + return Base.str() + ",(" + Guard.str() + ")"; + if (Guard.empty() || Guard == Base || Guard.starts_with(Base.str() + ",")) + return Guard.str(); + return Base.str() + "," + Guard.str(); + }; + + this->SVEGuard = FormatGuard(SVEGuard, "sve"); + this->SMEGuard = FormatGuard(SMEGuard, "sme"); + // Types[0] is the return value. for (unsigned I = 0; I < (getNumParams() + 1); ++I) { char Mod; diff --git a/compiler-rt/include/sanitizer/ubsan_interface.h b/compiler-rt/include/sanitizer/ubsan_interface.h index 435eb1ae332cad1..30a7fd875043db5 100644 --- a/compiler-rt/include/sanitizer/ubsan_interface.h +++ b/compiler-rt/include/sanitizer/ubsan_interface.h @@ -13,6 +13,8 @@ #ifndef SANITIZER_UBSAN_INTERFACE_H #define SANITIZER_UBSAN_INTERFACE_H +#include + #ifdef __cplusplus extern "C" { #endif diff --git a/compiler-rt/lib/builtins/cpu_model/AArch64CPUFeatures.inc b/compiler-rt/lib/builtins/cpu_model/AArch64CPUFeatures.inc index 5c6c3475ed910ba..bb1875fe9f72c8d 100644 --- a/compiler-rt/lib/builtins/cpu_model/AArch64CPUFeatures.inc +++ b/compiler-rt/lib/builtins/cpu_model/AArch64CPUFeatures.inc @@ -70,7 +70,7 @@ enum CPUFeatures { FEAT_MEMTAG3, FEAT_SB, FEAT_PREDRES, - FEAT_SSBS, + RESERVED_FEAT_SSBS, // previously used and now ABI legacy FEAT_SSBS2, FEAT_BTI, RESERVED_FEAT_LS64, // previously used and now ABI legacy diff --git a/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/apple.inc b/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/apple.inc index da1c4dfcb0c9a82..82478691fcd4157 100644 --- a/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/apple.inc +++ b/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/apple.inc @@ -31,10 +31,6 @@ static bool isKnownAndSupported(const char *name) { } static uint64_t deriveImplicitFeatures(uint64_t features) { - // FEAT_SSBS2 implies FEAT_SSBS - if ((1ULL << FEAT_SSBS2) & features) - features |= (1ULL << FEAT_SSBS); - // FEAT_FP is always enabled features |= (1ULL << FEAT_FP); diff --git a/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/mrs.inc b/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/mrs.inc index 6f6bd19a2120ad0..a9befd7f3e56d7a 100644 --- a/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/mrs.inc +++ b/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/mrs.inc @@ -43,10 +43,8 @@ static void __init_cpu_features_constructor(unsigned long hwcap, setCPUFeature(FEAT_FCMA); if (hwcap & HWCAP_SB) setCPUFeature(FEAT_SB); - if (hwcap & HWCAP_SSBS) { - setCPUFeature(FEAT_SSBS); + if (hwcap & HWCAP_SSBS) setCPUFeature(FEAT_SSBS2); - } if (hwcap2 & HWCAP2_MTE) { setCPUFeature(FEAT_MEMTAG); setCPUFeature(FEAT_MEMTAG2); diff --git a/compiler-rt/lib/builtins/cpu_model/x86.c b/compiler-rt/lib/builtins/cpu_model/x86.c index dbe6094541f63de..23f8fa3e1fd490d 100644 --- a/compiler-rt/lib/builtins/cpu_model/x86.c +++ b/compiler-rt/lib/builtins/cpu_model/x86.c @@ -649,6 +649,7 @@ static const char *getAMDProcessorTypeAndSubtype(unsigned Family, CPU = "k8"; break; case 16: + case 18: CPU = "amdfam10"; *Type = AMDFAM10H; // "amdfam10" switch (Model) { diff --git a/compiler-rt/lib/rtsan/rtsan.cpp b/compiler-rt/lib/rtsan/rtsan.cpp index f9741b4fe3509dd..9f4a9a5701636fc 100644 --- a/compiler-rt/lib/rtsan/rtsan.cpp +++ b/compiler-rt/lib/rtsan/rtsan.cpp @@ -75,8 +75,11 @@ static auto OnViolationAction(DiagnosticsInfo info) { handle.inc_use_count_unsafe(); } - if (flags().halt_on_error) + if (flags().halt_on_error) { + if (flags().print_stats_on_exit) + PrintStatisticsSummary(); Die(); + } }; } diff --git a/compiler-rt/lib/rtsan/rtsan_assertions.h b/compiler-rt/lib/rtsan/rtsan_assertions.h index 1a653d198ab88b6..4646e750b6796ef 100644 --- a/compiler-rt/lib/rtsan/rtsan_assertions.h +++ b/compiler-rt/lib/rtsan/rtsan_assertions.h @@ -21,9 +21,8 @@ template void ExpectNotRealtime(Context &context, OnViolationAction &&OnViolation) { CHECK(__rtsan_is_initialized()); if (context.InRealtimeContext() && !context.IsBypassed()) { - context.BypassPush(); + ScopedBypass sb{context}; OnViolation(); - context.BypassPop(); } } diff --git a/compiler-rt/lib/rtsan/rtsan_context.h b/compiler-rt/lib/rtsan/rtsan_context.h index cb0c2eb0a5e0d74..97fd9b48062ece7 100644 --- a/compiler-rt/lib/rtsan/rtsan_context.h +++ b/compiler-rt/lib/rtsan/rtsan_context.h @@ -35,5 +35,22 @@ class Context { int bypass_depth_{0}; }; +class ScopedBypass { +public: + [[nodiscard]] explicit ScopedBypass(Context &context) : context_(context) { + context_.BypassPush(); + } + + ~ScopedBypass() { context_.BypassPop(); } + + ScopedBypass(const ScopedBypass &) = delete; + ScopedBypass &operator=(const ScopedBypass &) = delete; + ScopedBypass(ScopedBypass &&) = delete; + ScopedBypass &operator=(ScopedBypass &&) = delete; + +private: + Context &context_; +}; + Context &GetContextForThisThread(); } // namespace __rtsan diff --git a/compiler-rt/lib/rtsan/tests/rtsan_test_interceptors.cpp b/compiler-rt/lib/rtsan/tests/rtsan_test_interceptors.cpp index c65b1bb01fbe01a..f7a281f13e2ff65 100644 --- a/compiler-rt/lib/rtsan/tests/rtsan_test_interceptors.cpp +++ b/compiler-rt/lib/rtsan/tests/rtsan_test_interceptors.cpp @@ -539,7 +539,7 @@ TEST(TestRtsanInterceptors, SpinLockLockDiesWhenRealtime) { TEST(TestRtsanInterceptors, PthreadCondSignalDiesWhenRealtime) { pthread_cond_t cond{}; - pthread_cond_init(&cond, NULL); + ASSERT_EQ(0, pthread_cond_init(&cond, nullptr)); auto Func = [&cond]() { pthread_cond_signal(&cond); }; ExpectRealtimeDeath(Func, "pthread_cond_signal"); @@ -550,7 +550,7 @@ TEST(TestRtsanInterceptors, PthreadCondSignalDiesWhenRealtime) { TEST(TestRtsanInterceptors, PthreadCondBroadcastDiesWhenRealtime) { pthread_cond_t cond{}; - pthread_cond_init(&cond, NULL); + ASSERT_EQ(0, pthread_cond_init(&cond, nullptr)); auto Func = [&cond]() { pthread_cond_broadcast(&cond); }; ExpectRealtimeDeath(Func, "pthread_cond_broadcast"); diff --git a/compiler-rt/lib/scudo/standalone/combined.h b/compiler-rt/lib/scudo/standalone/combined.h index a5f1bc388e88243..323a8b9d76c994b 100644 --- a/compiler-rt/lib/scudo/standalone/combined.h +++ b/compiler-rt/lib/scudo/standalone/combined.h @@ -785,6 +785,9 @@ class Allocator { // A corrupted chunk will not be reported as owned, which is WAI. bool isOwned(const void *Ptr) { initThreadMaybe(); + // If the allocation is not owned, the tags could be wrong. + ScopedDisableMemoryTagChecks x( + useMemoryTagging(Primary.Options.load())); #ifdef GWP_ASAN_HOOKS if (GuardedAlloc.pointerIsMine(Ptr)) return true; diff --git a/compiler-rt/lib/scudo/standalone/memtag.h b/compiler-rt/lib/scudo/standalone/memtag.h index 1f6983e99404a2c..83ebe676433ebdc 100644 --- a/compiler-rt/lib/scudo/standalone/memtag.h +++ b/compiler-rt/lib/scudo/standalone/memtag.h @@ -122,9 +122,12 @@ inline NORETURN void enableSystemMemoryTaggingTestOnly() { class ScopedDisableMemoryTagChecks { uptr PrevTCO; + bool active; public: - ScopedDisableMemoryTagChecks() { + ScopedDisableMemoryTagChecks(bool cond = true) : active(cond) { + if (!active) + return; __asm__ __volatile__( R"( .arch_extension memtag @@ -135,6 +138,8 @@ class ScopedDisableMemoryTagChecks { } ~ScopedDisableMemoryTagChecks() { + if (!active) + return; __asm__ __volatile__( R"( .arch_extension memtag @@ -269,7 +274,7 @@ inline NORETURN void enableSystemMemoryTaggingTestOnly() { } struct ScopedDisableMemoryTagChecks { - ScopedDisableMemoryTagChecks() {} + ScopedDisableMemoryTagChecks(UNUSED bool cond = true) {} }; inline NORETURN uptr selectRandomTag(uptr Ptr, uptr ExcludeMask) { diff --git a/compiler-rt/test/rtsan/blocking_call.cpp b/compiler-rt/test/rtsan/blocking_call.cpp index 47ce3d5544cbd6c..5c7f9d331096dea 100644 --- a/compiler-rt/test/rtsan/blocking_call.cpp +++ b/compiler-rt/test/rtsan/blocking_call.cpp @@ -7,28 +7,23 @@ #include #include -// TODO: Remove when [[blocking]] is implemented. -extern "C" void __rtsan_notify_blocking_call(const char *function_name); - -void custom_blocking_function() { - // TODO: When [[blocking]] is implemented, don't call this directly. - __rtsan_notify_blocking_call(__func__); -} - -void safe_call() { - // TODO: When [[blocking]] is implemented, don't call this directly. - __rtsan_notify_blocking_call(__func__); +void custom_blocking_function() [[clang::blocking]] { + printf("In blocking function\n"); } -void process() [[clang::nonblocking]] { custom_blocking_function(); } +void realtime_function() [[clang::nonblocking]] { custom_blocking_function(); } +void nonrealtime_function() { custom_blocking_function(); } int main() { - safe_call(); // This shouldn't die, because it isn't in nonblocking context. - process(); + nonrealtime_function(); + realtime_function(); return 0; - // CHECK-NOT: {{.*safe_call*}} - // CHECK: ==ERROR: RealtimeSanitizer: blocking-call - // CHECK-NEXT: Call to blocking function `custom_blocking_function` in real-time context! - // CHECK-NEXT: {{.*custom_blocking_function*}} - // CHECK-NEXT: {{.*process*}} } + +// CHECK: ==ERROR: RealtimeSanitizer: blocking-call +// CHECK-NEXT: Call to blocking function `custom_blocking_function()` in real-time context! +// CHECK-NEXT: {{.*custom_blocking_function*}} +// CHECK-NEXT: {{.*realtime_function*}} + +// should only occur once +// CHECK-NOT: ==ERROR: RealtimeSanitizer: blocking-call diff --git a/compiler-rt/test/rtsan/exit_stats.cpp b/compiler-rt/test/rtsan/exit_stats.cpp index 4341fbb0f9cf211..d4d19ace778ba5a 100644 --- a/compiler-rt/test/rtsan/exit_stats.cpp +++ b/compiler-rt/test/rtsan/exit_stats.cpp @@ -1,5 +1,6 @@ // RUN: %clangxx -fsanitize=realtime %s -o %t -// RUN: env RTSAN_OPTIONS="halt_on_error=false,print_stats_on_exit=true" %run %t 2>&1 | FileCheck %s +// RUN: %env_rtsan_opts="halt_on_error=false,print_stats_on_exit=true" %run %t 2>&1 | FileCheck %s +// RUN: %env_rtsan_opts="halt_on_error=true,print_stats_on_exit=true" not %run %t 2>&1 | FileCheck %s --check-prefix=CHECK-HALT // UNSUPPORTED: ios @@ -22,3 +23,7 @@ int main() { // CHECK: RealtimeSanitizer exit stats: // CHECK-NEXT: Total error count: 10 // CHECK-NEXT: Unique error count: 1 + +// CHECK-HALT: RealtimeSanitizer exit stats: +// CHECK-HALT-NEXT: Total error count: 1 +// CHECK-HALT-NEXT: Unique error count: 1 diff --git a/flang/include/flang/Common/Fortran-features.h b/flang/include/flang/Common/Fortran-features.h index f813cbae40a57ea..3942a7926286456 100644 --- a/flang/include/flang/Common/Fortran-features.h +++ b/flang/include/flang/Common/Fortran-features.h @@ -72,7 +72,8 @@ ENUM_CLASS(UsageWarning, Portability, PointerToUndefinable, IgnoredIntrinsicFunctionType, PreviousScalarUse, RedeclaredInaccessibleComponent, ImplicitShared, IndexVarRedefinition, IncompatibleImplicitInterfaces, BadTypeForTarget, - VectorSubscriptFinalization, UndefinedFunctionResult, UselessIomsg) + VectorSubscriptFinalization, UndefinedFunctionResult, UselessIomsg, + MismatchingDummyProcedure) using LanguageFeatures = EnumSet; using UsageWarnings = EnumSet; diff --git a/flang/lib/Optimizer/Transforms/AddDebugInfo.cpp b/flang/lib/Optimizer/Transforms/AddDebugInfo.cpp index bf4de78dbdb2138..400a8648dd7e076 100644 --- a/flang/lib/Optimizer/Transforms/AddDebugInfo.cpp +++ b/flang/lib/Optimizer/Transforms/AddDebugInfo.cpp @@ -344,7 +344,8 @@ void AddDebugInfoPass::handleFuncOp(mlir::func::FuncOp funcOp, if (debugLevel == mlir::LLVM::DIEmissionKind::LineTablesOnly) { auto spAttr = mlir::LLVM::DISubprogramAttr::get( context, id, compilationUnit, Scope, funcName, fullName, funcFileAttr, - line, line, subprogramFlags, subTypeAttr, /*retainedNodes=*/{}); + line, line, subprogramFlags, subTypeAttr, /*retainedNodes=*/{}, + /*annotations=*/{}); funcOp->setLoc(builder.getFusedLoc({l}, spAttr)); return; } @@ -368,7 +369,7 @@ void AddDebugInfoPass::handleFuncOp(mlir::func::FuncOp funcOp, auto spAttr = mlir::LLVM::DISubprogramAttr::get( context, recId, /*isRecSelf=*/true, id, compilationUnit, Scope, funcName, fullName, funcFileAttr, line, line, subprogramFlags, subTypeAttr, - /*retainedNodes=*/{}); + /*retainedNodes=*/{}, /*annotations=*/{}); // There is no direct information in the IR for any 'use' statement in the // function. We have to extract that information from the DeclareOp. We do @@ -401,7 +402,7 @@ void AddDebugInfoPass::handleFuncOp(mlir::func::FuncOp funcOp, spAttr = mlir::LLVM::DISubprogramAttr::get( context, recId, /*isRecSelf=*/false, id2, compilationUnit, Scope, funcName, fullName, funcFileAttr, line, line, subprogramFlags, - subTypeAttr, entities); + subTypeAttr, entities, /*annotations=*/{}); funcOp->setLoc(builder.getFusedLoc({l}, spAttr)); funcOp.walk([&](fir::cg::XDeclareOp declOp) { diff --git a/flang/lib/Semantics/check-call.cpp b/flang/lib/Semantics/check-call.cpp index 28a12a5798cb05b..fa2d59da10f8276 100644 --- a/flang/lib/Semantics/check-call.cpp +++ b/flang/lib/Semantics/check-call.cpp @@ -265,7 +265,7 @@ static void ConvertIntegerActual(evaluate::Expr &actual, if (!semanticsContext.IsEnabled( common::LanguageFeature::ActualIntegerConvertedToSmallerKind)) { messages.Say( - "Actual argument scalar expression of type INTEGER(%d) cannot beimplicitly converted to smaller dummy argument type INTEGER(%d)"_err_en_US, + "Actual argument scalar expression of type INTEGER(%d) cannot be implicitly converted to smaller dummy argument type INTEGER(%d)"_err_en_US, actualType.type().kind(), dummyType.type().kind()); } else if (semanticsContext.ShouldWarn(common::LanguageFeature:: ActualIntegerConvertedToSmallerKind)) { @@ -300,12 +300,15 @@ static void ConvertLogicalActual(evaluate::Expr &actual, } static bool DefersSameTypeParameters( - const DerivedTypeSpec &actual, const DerivedTypeSpec &dummy) { - for (const auto &pair : actual.parameters()) { - const ParamValue &actualValue{pair.second}; - const ParamValue *dummyValue{dummy.FindParameter(pair.first)}; - if (!dummyValue || (actualValue.isDeferred() != dummyValue->isDeferred())) { - return false; + const DerivedTypeSpec *actual, const DerivedTypeSpec *dummy) { + if (actual && dummy) { + for (const auto &pair : actual->parameters()) { + const ParamValue &actualValue{pair.second}; + const ParamValue *dummyValue{dummy->FindParameter(pair.first)}; + if (!dummyValue || + (actualValue.isDeferred() != dummyValue->isDeferred())) { + return false; + } } } return true; @@ -370,9 +373,37 @@ static void CheckExplicitDataArg(const characteristics::DummyDataObject &dummy, } bool dummyIsAssumedRank{dummy.type.attrs().test( characteristics::TypeAndShape::Attr::AssumedRank)}; + bool actualIsAssumedSize{actualType.attrs().test( + characteristics::TypeAndShape::Attr::AssumedSize)}; + bool actualIsAssumedRank{evaluate::IsAssumedRank(actual)}; + bool actualIsPointer{evaluate::IsObjectPointer(actual)}; + bool actualIsAllocatable{evaluate::IsAllocatableDesignator(actual)}; + bool actualMayBeAssumedSize{actualIsAssumedSize || + (actualIsAssumedRank && !actualIsPointer && !actualIsAllocatable)}; + bool actualIsPolymorphic{actualType.type().IsPolymorphic()}; + const auto *actualDerived{evaluate::GetDerivedTypeSpec(actualType.type())}; if (typesCompatible) { if (isElemental) { } else if (dummyIsAssumedRank) { + if (actualMayBeAssumedSize && dummy.intent == common::Intent::Out) { + // An INTENT(OUT) dummy might be a no-op at run time + bool dummyHasSignificantIntentOut{actualIsPolymorphic || + (actualDerived && + (actualDerived->HasDefaultInitialization( + /*ignoreAllocatable=*/false, /*ignorePointer=*/true) || + actualDerived->HasDestruction()))}; + const char *actualDesc{ + actualIsAssumedSize ? "Assumed-size" : "Assumed-rank"}; + if (dummyHasSignificantIntentOut) { + messages.Say( + "%s actual argument may not be associated with INTENT(OUT) assumed-rank dummy argument requiring finalization, destruction, or initialization"_err_en_US, + actualDesc); + } else { + context.Warn(common::UsageWarning::Portability, messages.at(), + "%s actual argument should not be associated with INTENT(OUT) assumed-rank dummy argument"_port_en_US, + actualDesc); + } + } } else if (dummy.ignoreTKR.test(common::IgnoreTKR::Rank)) { } else if (dummyRank > 0 && !dummyIsAllocatableOrPointer && !dummy.type.attrs().test( @@ -401,11 +432,7 @@ static void CheckExplicitDataArg(const characteristics::DummyDataObject &dummy, dummy.type.type().AsFortran()); } - bool actualIsPolymorphic{actualType.type().IsPolymorphic()}; - bool dummyIsPolymorphic{dummy.type.type().IsPolymorphic()}; bool actualIsCoindexed{ExtractCoarrayRef(actual).has_value()}; - bool actualIsAssumedSize{actualType.attrs().test( - characteristics::TypeAndShape::Attr::AssumedSize)}; bool dummyIsAssumedSize{dummy.type.attrs().test( characteristics::TypeAndShape::Attr::AssumedSize)}; bool dummyIsAsynchronous{ @@ -414,7 +441,7 @@ static void CheckExplicitDataArg(const characteristics::DummyDataObject &dummy, dummy.attrs.test(characteristics::DummyDataObject::Attr::Volatile)}; bool dummyIsValue{ dummy.attrs.test(characteristics::DummyDataObject::Attr::Value)}; - + bool dummyIsPolymorphic{dummy.type.type().IsPolymorphic()}; if (actualIsPolymorphic && dummyIsPolymorphic && actualIsCoindexed) { // 15.5.2.4(2) messages.Say( @@ -434,37 +461,36 @@ static void CheckExplicitDataArg(const characteristics::DummyDataObject &dummy, actualFirstSymbol && actualFirstSymbol->attrs().test(Attr::ASYNCHRONOUS)}; bool actualIsVolatile{ actualFirstSymbol && actualFirstSymbol->attrs().test(Attr::VOLATILE)}; - const auto *derived{evaluate::GetDerivedTypeSpec(actualType.type())}; - if (derived && !derived->IsVectorType()) { + if (actualDerived && !actualDerived->IsVectorType()) { if (dummy.type.type().IsAssumedType()) { - if (!derived->parameters().empty()) { // 15.5.2.4(2) + if (!actualDerived->parameters().empty()) { // 15.5.2.4(2) messages.Say( "Actual argument associated with TYPE(*) %s may not have a parameterized derived type"_err_en_US, dummyName); } if (const Symbol * - tbp{FindImmediateComponent(*derived, [](const Symbol &symbol) { + tbp{FindImmediateComponent(*actualDerived, [](const Symbol &symbol) { return symbol.has(); })}) { // 15.5.2.4(2) evaluate::SayWithDeclaration(messages, *tbp, "Actual argument associated with TYPE(*) %s may not have type-bound procedure '%s'"_err_en_US, dummyName, tbp->name()); } - auto finals{FinalsForDerivedTypeInstantiation(*derived)}; + auto finals{FinalsForDerivedTypeInstantiation(*actualDerived)}; if (!finals.empty()) { // 15.5.2.4(2) SourceName name{finals.front()->name()}; if (auto *msg{messages.Say( "Actual argument associated with TYPE(*) %s may not have derived type '%s' with FINAL subroutine '%s'"_err_en_US, - dummyName, derived->typeSymbol().name(), name)}) { + dummyName, actualDerived->typeSymbol().name(), name)}) { msg->Attach(name, "FINAL subroutine '%s' in derived type '%s'"_en_US, - name, derived->typeSymbol().name()); + name, actualDerived->typeSymbol().name()); } } } if (actualIsCoindexed) { if (dummy.intent != common::Intent::In && !dummyIsValue) { - if (auto bad{ - FindAllocatableUltimateComponent(*derived)}) { // 15.5.2.4(6) + if (auto bad{FindAllocatableUltimateComponent( + *actualDerived)}) { // 15.5.2.4(6) evaluate::SayWithDeclaration(messages, *bad, "Coindexed actual argument with ALLOCATABLE ultimate component '%s' must be associated with a %s with VALUE or INTENT(IN) attributes"_err_en_US, bad.BuildResultDesignatorName(), dummyName); @@ -484,7 +510,7 @@ static void CheckExplicitDataArg(const characteristics::DummyDataObject &dummy, } } if (actualIsVolatile != dummyIsVolatile) { // 15.5.2.4(22) - if (auto bad{semantics::FindCoarrayUltimateComponent(*derived)}) { + if (auto bad{semantics::FindCoarrayUltimateComponent(*actualDerived)}) { evaluate::SayWithDeclaration(messages, *bad, "VOLATILE attribute must match for %s when actual argument has a coarray ultimate component '%s'"_err_en_US, dummyName, bad.BuildResultDesignatorName()); @@ -501,8 +527,6 @@ static void CheckExplicitDataArg(const characteristics::DummyDataObject &dummy, ? actualLastSymbol->detailsIf() : nullptr}; int actualRank{actualType.Rank()}; - bool actualIsPointer{evaluate::IsObjectPointer(actual)}; - bool actualIsAssumedRank{evaluate::IsAssumedRank(actual)}; if (dummy.type.attrs().test( characteristics::TypeAndShape::Attr::AssumedShape)) { // 15.5.2.4(16) @@ -730,7 +754,6 @@ static void CheckExplicitDataArg(const characteristics::DummyDataObject &dummy, } // 15.5.2.6 -- dummy is ALLOCATABLE - bool actualIsAllocatable{evaluate::IsAllocatableDesignator(actual)}; bool dummyIsOptional{ dummy.attrs.test(characteristics::DummyDataObject::Attr::Optional)}; bool actualIsNull{evaluate::IsNullPointer(actual)}; @@ -851,10 +874,8 @@ static void CheckExplicitDataArg(const characteristics::DummyDataObject &dummy, } } // 15.5.2.5(4) - const auto *derived{evaluate::GetDerivedTypeSpec(actualType.type())}; - if ((derived && - !DefersSameTypeParameters(*derived, - *evaluate::GetDerivedTypeSpec(dummy.type.type()))) || + const auto *dummyDerived{evaluate::GetDerivedTypeSpec(dummy.type.type())}; + if (!DefersSameTypeParameters(actualDerived, dummyDerived) || dummy.type.type().HasDeferredTypeParameter() != actualType.type().HasDeferredTypeParameter()) { messages.Say( diff --git a/flang/lib/Semantics/check-declarations.cpp b/flang/lib/Semantics/check-declarations.cpp index 7778561fb5bd331..f8e873008ceabc0 100644 --- a/flang/lib/Semantics/check-declarations.cpp +++ b/flang/lib/Semantics/check-declarations.cpp @@ -3765,12 +3765,20 @@ void SubprogramMatchHelper::CheckDummyDataObject(const Symbol &symbol1, void SubprogramMatchHelper::CheckDummyProcedure(const Symbol &symbol1, const Symbol &symbol2, const DummyProcedure &proc1, const DummyProcedure &proc2) { + std::string whyNot; if (!CheckSameIntent(symbol1, symbol2, proc1.intent, proc2.intent)) { } else if (!CheckSameAttrs(symbol1, symbol2, proc1.attrs, proc2.attrs)) { - } else if (proc1 != proc2) { + } else if (!proc2.IsCompatibleWith(proc1, &whyNot)) { Say(symbol1, symbol2, - "Dummy procedure '%s' does not match the corresponding argument in" - " the interface body"_err_en_US); + "Dummy procedure '%s' is not compatible with the corresponding argument in the interface body: %s"_err_en_US, + whyNot); + } else if (proc1 != proc2) { + evaluate::AttachDeclaration( + symbol1.owner().context().Warn( + common::UsageWarning::MismatchingDummyProcedure, + "Dummy procedure '%s' does not exactly match the corresponding argument in the interface body"_warn_en_US, + symbol1.name()), + symbol2); } } diff --git a/flang/lib/Semantics/tools.cpp b/flang/lib/Semantics/tools.cpp index 3723b28fecef527..4d2a0a607abe897 100644 --- a/flang/lib/Semantics/tools.cpp +++ b/flang/lib/Semantics/tools.cpp @@ -688,7 +688,7 @@ bool IsInitialized(const Symbol &symbol, bool ignoreDataStatements, } else if (IsNamedConstant(symbol)) { return false; } else if (const auto *object{symbol.detailsIf()}) { - if (!object->isDummy() && object->type()) { + if ((!object->isDummy() || IsIntentOut(symbol)) && object->type()) { if (const auto *derived{object->type()->AsDerived()}) { return derived->HasDefaultInitialization( ignoreAllocatable, ignorePointer); @@ -705,7 +705,7 @@ bool IsDestructible(const Symbol &symbol, const Symbol *derivedTypeSymbol) { IsPointer(symbol)) { return false; } else if (const auto *object{symbol.detailsIf()}) { - if (!object->isDummy() && object->type()) { + if ((!object->isDummy() || IsIntentOut(symbol)) && object->type()) { if (const auto *derived{object->type()->AsDerived()}) { return &derived->typeSymbol() != derivedTypeSymbol && derived->HasDestruction(); @@ -1649,7 +1649,9 @@ bool HasDefinedIo(common::DefinedIo which, const DerivedTypeSpec &derived, } } } - return false; + // Check for inherited defined I/O + const auto *parentType{derived.typeSymbol().GetParentTypeSpec()}; + return parentType && HasDefinedIo(which, *parentType, scope); } void WarnOnDeferredLengthCharacterScalar(SemanticsContext &context, diff --git a/flang/test/Integration/OpenMP/atomic-update-complex.f90 b/flang/test/Integration/OpenMP/atomic-update-complex.f90 deleted file mode 100644 index 827e84a011f53bd..000000000000000 --- a/flang/test/Integration/OpenMP/atomic-update-complex.f90 +++ /dev/null @@ -1,42 +0,0 @@ -!===----------------------------------------------------------------------===! -! This directory can be used to add Integration tests involving multiple -! stages of the compiler (for eg. from Fortran to LLVM IR). It should not -! contain executable tests. We should only add tests here sparingly and only -! if there is no other way to test. Repeat this message in each test that is -! added to this directory and sub-directories. -!===----------------------------------------------------------------------===! - -!RUN: %flang_fc1 -emit-llvm -fopenmp %s -o - | FileCheck %s - -!CHECK: define void @_QQmain() { -!CHECK: %[[X_NEW_VAL:.*]] = alloca { float, float }, align 8 -!CHECK: {{.*}} = alloca { float, float }, i64 1, align 8 -!CHECK: %[[ORIG_VAL:.*]] = alloca { float, float }, i64 1, align 8 -!CHECK: store { float, float } { float 2.000000e+00, float 2.000000e+00 }, ptr %[[ORIG_VAL]], align 4 -!CHECK: br label %entry - -!CHECK: entry: -!CHECK: %[[ATOMIC_TEMP_LOAD:.*]] = alloca { float, float }, align 8 -!CHECK: call void @__atomic_load(i64 8, ptr %[[ORIG_VAL]], ptr %[[ATOMIC_TEMP_LOAD]], i32 0) -!CHECK: %[[PHI_NODE_ENTRY_1:.*]] = load { float, float }, ptr %[[ATOMIC_TEMP_LOAD]], align 8 -!CHECK: br label %.atomic.cont - -!CHECK: .atomic.cont -!CHECK: %[[VAL_4:.*]] = phi { float, float } [ %[[PHI_NODE_ENTRY_1]], %entry ], [ %{{.*}}, %.atomic.cont ] -!CHECK: %[[VAL_5:.*]] = extractvalue { float, float } %[[VAL_4]], 0 -!CHECK: %[[VAL_6:.*]] = extractvalue { float, float } %[[VAL_4]], 1 -!CHECK: %[[VAL_7:.*]] = fadd contract float %[[VAL_5]], 1.000000e+00 -!CHECK: %[[VAL_8:.*]] = fadd contract float %[[VAL_6]], 1.000000e+00 -!CHECK: %[[VAL_9:.*]] = insertvalue { float, float } undef, float %[[VAL_7]], 0 -!CHECK: %[[VAL_10:.*]] = insertvalue { float, float } %[[VAL_9]], float %[[VAL_8]], 1 -!CHECK: store { float, float } %[[VAL_10]], ptr %[[X_NEW_VAL]], align 4 -!CHECK: %[[VAL_11:.*]] = call i1 @__atomic_compare_exchange(i64 8, ptr %[[ORIG_VAL]], ptr %[[ATOMIC_TEMP_LOAD]], ptr %[[X_NEW_VAL]], i32 2, i32 2) -!CHECK: %[[VAL_12:.*]] = load { float, float }, ptr %[[ATOMIC_TEMP_LOAD]], align 4 -!CHECK: br i1 %[[VAL_11]], label %.atomic.exit, label %.atomic.cont -program main - complex*8 ia, ib - ia = (2, 2) - !$omp atomic update - ia = ia + (1, 1) - !$omp end atomic -end program diff --git a/flang/test/Semantics/call42.f90 b/flang/test/Semantics/call42.f90 new file mode 100644 index 000000000000000..2d5303b58cb0180 --- /dev/null +++ b/flang/test/Semantics/call42.f90 @@ -0,0 +1,138 @@ +! RUN: %python %S/test_errors.py %s %flang_fc1 -pedantic +module m + type boring + end type + type hasAlloc + real, allocatable :: x + end type + type hasInit + real :: x = 1. + end type + type hasFinal + contains + final final + end type + contains + elemental subroutine final(x) + type(hasFinal), intent(in out) :: x + end + + recursive subroutine typeOutAssumedRank(a,b,c,d) + type(boring), intent(out) :: a(..) + type(hasAlloc), intent(out) :: b(..) + type(hasInit), intent(out) :: c(..) + type(hasFinal), intent(out) :: d(..) + !PORTABILITY: Assumed-rank actual argument should not be associated with INTENT(OUT) assumed-rank dummy argument + !ERROR: Assumed-rank actual argument may not be associated with INTENT(OUT) assumed-rank dummy argument requiring finalization, destruction, or initialization + !ERROR: Assumed-rank actual argument may not be associated with INTENT(OUT) assumed-rank dummy argument requiring finalization, destruction, or initialization + !ERROR: Assumed-rank actual argument may not be associated with INTENT(OUT) assumed-rank dummy argument requiring finalization, destruction, or initialization + call typeOutAssumedRank(a, b, c, d) + !PORTABILITY: Assumed-rank actual argument should not be associated with INTENT(OUT) assumed-rank dummy argument + !ERROR: Assumed-rank actual argument may not be associated with INTENT(OUT) assumed-rank dummy argument requiring finalization, destruction, or initialization + !ERROR: Assumed-rank actual argument may not be associated with INTENT(OUT) assumed-rank dummy argument requiring finalization, destruction, or initialization + !ERROR: Assumed-rank actual argument may not be associated with INTENT(OUT) assumed-rank dummy argument requiring finalization, destruction, or initialization + call classOutAssumedRank(a, b, c, d) + !PORTABILITY: Assumed-rank actual argument should not be associated with INTENT(OUT) assumed-rank dummy argument + !ERROR: Assumed-rank actual argument may not be associated with INTENT(OUT) assumed-rank dummy argument requiring finalization, destruction, or initialization + !ERROR: Assumed-rank actual argument may not be associated with INTENT(OUT) assumed-rank dummy argument requiring finalization, destruction, or initialization + !ERROR: Assumed-rank actual argument may not be associated with INTENT(OUT) assumed-rank dummy argument requiring finalization, destruction, or initialization + call unlimitedOutAssumedRank(a, b, c, d) + end + recursive subroutine typeOutAssumedRankAlloc(a,b,c,d) + type(boring), intent(out), allocatable :: a(..) + type(hasAlloc), intent(out), allocatable :: b(..) + type(hasInit), intent(out), allocatable :: c(..) + type(hasFinal), intent(out), allocatable :: d(..) + call typeOutAssumedRank(a, b, c, d) + call typeOutAssumedRankAlloc(a, b, c, d) + end + recursive subroutine classOutAssumedRank(a,b,c,d) + class(boring), intent(out) :: a(..) + class(hasAlloc), intent(out) :: b(..) + class(hasInit), intent(out) :: c(..) + class(hasFinal), intent(out) :: d(..) + !ERROR: Assumed-rank actual argument may not be associated with INTENT(OUT) assumed-rank dummy argument requiring finalization, destruction, or initialization + !ERROR: Assumed-rank actual argument may not be associated with INTENT(OUT) assumed-rank dummy argument requiring finalization, destruction, or initialization + !ERROR: Assumed-rank actual argument may not be associated with INTENT(OUT) assumed-rank dummy argument requiring finalization, destruction, or initialization + !ERROR: Assumed-rank actual argument may not be associated with INTENT(OUT) assumed-rank dummy argument requiring finalization, destruction, or initialization + call typeOutAssumedRank(a, b, c, d) + !ERROR: Assumed-rank actual argument may not be associated with INTENT(OUT) assumed-rank dummy argument requiring finalization, destruction, or initialization + !ERROR: Assumed-rank actual argument may not be associated with INTENT(OUT) assumed-rank dummy argument requiring finalization, destruction, or initialization + !ERROR: Assumed-rank actual argument may not be associated with INTENT(OUT) assumed-rank dummy argument requiring finalization, destruction, or initialization + !ERROR: Assumed-rank actual argument may not be associated with INTENT(OUT) assumed-rank dummy argument requiring finalization, destruction, or initialization + call classOutAssumedRank(a, b, c, d) + !ERROR: Assumed-rank actual argument may not be associated with INTENT(OUT) assumed-rank dummy argument requiring finalization, destruction, or initialization + !ERROR: Assumed-rank actual argument may not be associated with INTENT(OUT) assumed-rank dummy argument requiring finalization, destruction, or initialization + !ERROR: Assumed-rank actual argument may not be associated with INTENT(OUT) assumed-rank dummy argument requiring finalization, destruction, or initialization + !ERROR: Assumed-rank actual argument may not be associated with INTENT(OUT) assumed-rank dummy argument requiring finalization, destruction, or initialization + call unlimitedOutAssumedRank(a, b, c, d) + end + recursive subroutine classOutAssumedRankAlloc(a,b,c,d) + class(boring), intent(out), allocatable :: a(..) + class(hasAlloc), intent(out), allocatable :: b(..) + class(hasInit), intent(out), allocatable :: c(..) + class(hasFinal), intent(out), allocatable :: d(..) + call classOutAssumedRank(a, b, c, d) + call classOutAssumedRankAlloc(a, b, c, d) + call unlimitedOutAssumedRank(a, b, c, d) + end + recursive subroutine unlimitedOutAssumedRank(a,b,c,d) + class(*), intent(out) :: a(..), b(..), c(..), d(..) + !ERROR: Assumed-rank actual argument may not be associated with INTENT(OUT) assumed-rank dummy argument requiring finalization, destruction, or initialization + !ERROR: Assumed-rank actual argument may not be associated with INTENT(OUT) assumed-rank dummy argument requiring finalization, destruction, or initialization + !ERROR: Assumed-rank actual argument may not be associated with INTENT(OUT) assumed-rank dummy argument requiring finalization, destruction, or initialization + !ERROR: Assumed-rank actual argument may not be associated with INTENT(OUT) assumed-rank dummy argument requiring finalization, destruction, or initialization + call unlimitedOutAssumedRank(a, b, c, d) + end + recursive subroutine unlimitedOutAssumedRankAlloc(a,b,c,d) + class(*), intent(out), allocatable :: a(..), b(..), c(..), d(..) + call unlimitedOutAssumedRank(a, b, c, d) + call unlimitedOutAssumedRankAlloc(a, b, c, d) + end + + subroutine typeAssumedSize(a,b,c,d) + type(boring) a(*) + type(hasAlloc) b(*) + type(hasInit) c(*) + type(hasFinal) d(*) + !PORTABILITY: Assumed-size actual argument should not be associated with INTENT(OUT) assumed-rank dummy argument + !ERROR: Assumed-size actual argument may not be associated with INTENT(OUT) assumed-rank dummy argument requiring finalization, destruction, or initialization + !ERROR: Assumed-size actual argument may not be associated with INTENT(OUT) assumed-rank dummy argument requiring finalization, destruction, or initialization + !ERROR: Assumed-size actual argument may not be associated with INTENT(OUT) assumed-rank dummy argument requiring finalization, destruction, or initialization + call typeOutAssumedRank(a,b,c,d) + !PORTABILITY: Assumed-size actual argument should not be associated with INTENT(OUT) assumed-rank dummy argument + !ERROR: Assumed-size actual argument may not be associated with INTENT(OUT) assumed-rank dummy argument requiring finalization, destruction, or initialization + !ERROR: Assumed-size actual argument may not be associated with INTENT(OUT) assumed-rank dummy argument requiring finalization, destruction, or initialization + !ERROR: Assumed-size actual argument may not be associated with INTENT(OUT) assumed-rank dummy argument requiring finalization, destruction, or initialization + call classOutAssumedRank(a,b,c,d) + !PORTABILITY: Assumed-size actual argument should not be associated with INTENT(OUT) assumed-rank dummy argument + !ERROR: Assumed-size actual argument may not be associated with INTENT(OUT) assumed-rank dummy argument requiring finalization, destruction, or initialization + !ERROR: Assumed-size actual argument may not be associated with INTENT(OUT) assumed-rank dummy argument requiring finalization, destruction, or initialization + !ERROR: Assumed-size actual argument may not be associated with INTENT(OUT) assumed-rank dummy argument requiring finalization, destruction, or initialization + call unlimitedOutAssumedRank(a,b,c,d) + end + subroutine classAssumedSize(a,b,c,d) + class(boring) a(*) + class(hasAlloc) b(*) + class(hasInit) c(*) + class(hasFinal) d(*) + !ERROR: Assumed-size actual argument may not be associated with INTENT(OUT) assumed-rank dummy argument requiring finalization, destruction, or initialization + !ERROR: Assumed-size actual argument may not be associated with INTENT(OUT) assumed-rank dummy argument requiring finalization, destruction, or initialization + !ERROR: Assumed-size actual argument may not be associated with INTENT(OUT) assumed-rank dummy argument requiring finalization, destruction, or initialization + !ERROR: Assumed-size actual argument may not be associated with INTENT(OUT) assumed-rank dummy argument requiring finalization, destruction, or initialization + call classOutAssumedRank(a,b,c,d) + !ERROR: Assumed-size actual argument may not be associated with INTENT(OUT) assumed-rank dummy argument requiring finalization, destruction, or initialization + !ERROR: Assumed-size actual argument may not be associated with INTENT(OUT) assumed-rank dummy argument requiring finalization, destruction, or initialization + !ERROR: Assumed-size actual argument may not be associated with INTENT(OUT) assumed-rank dummy argument requiring finalization, destruction, or initialization + !ERROR: Assumed-size actual argument may not be associated with INTENT(OUT) assumed-rank dummy argument requiring finalization, destruction, or initialization + call unlimitedOutAssumedRank(a,b,c,d) + end + subroutine unlimitedAssumedSize(a,b,c,d) + class(*) a(*), b(*), c(*), d(*) + !ERROR: Assumed-size actual argument may not be associated with INTENT(OUT) assumed-rank dummy argument requiring finalization, destruction, or initialization + !ERROR: Assumed-size actual argument may not be associated with INTENT(OUT) assumed-rank dummy argument requiring finalization, destruction, or initialization + !ERROR: Assumed-size actual argument may not be associated with INTENT(OUT) assumed-rank dummy argument requiring finalization, destruction, or initialization + !ERROR: Assumed-size actual argument may not be associated with INTENT(OUT) assumed-rank dummy argument requiring finalization, destruction, or initialization + call unlimitedOutAssumedRank(a, b, c, d) + end +end diff --git a/flang/test/Semantics/io14.f90 b/flang/test/Semantics/io14.f90 index 6dd6763bc944b93..39f91f5bd2752bb 100644 --- a/flang/test/Semantics/io14.f90 +++ b/flang/test/Semantics/io14.f90 @@ -9,6 +9,8 @@ module m procedure :: fwrite generic :: write(formatted) => fwrite end type + type, extends(t) :: t2 + end type contains subroutine fwrite(x, unit, iotype, vlist, iostat, iomsg) class(t), intent(in) :: x @@ -19,19 +21,16 @@ subroutine fwrite(x, unit, iotype, vlist, iostat, iomsg) character(*), intent(in out) :: iomsg write(unit, *, iostat=iostat, iomsg=iomsg) '(', iotype, ':', vlist, ':', x%n, ')' end subroutine - subroutine subr(x, y, z) + subroutine subr(x, y, z, w) class(t), intent(in) :: x class(base), intent(in) :: y class(*), intent(in) :: z + class(t2), intent(in) :: w print *, x ! ok + print *, w ! ok !ERROR: Derived type 'base' in I/O may not be polymorphic unless using defined I/O print *, y !ERROR: I/O list item may not be unlimited polymorphic print *, z end subroutine end - -program main - use m - call subr(t(123),t(234),t(345)) -end diff --git a/flang/test/Semantics/separate-mp02.f90 b/flang/test/Semantics/separate-mp02.f90 index c63ab6f41a13266..cb1e2687bad7363 100644 --- a/flang/test/Semantics/separate-mp02.f90 +++ b/flang/test/Semantics/separate-mp02.f90 @@ -1,4 +1,4 @@ -! RUN: %python %S/test_errors.py %s %flang_fc1 +! RUN: %python %S/test_errors.py %s %flang_fc1 -pedantic ! When a module subprogram has the MODULE prefix the following must match ! with the corresponding separate module procedure interface body: @@ -238,7 +238,7 @@ module subroutine s1(x) procedure(s_real2) :: x end module subroutine s2(x) - !ERROR: Dummy procedure 'x' does not match the corresponding argument in the interface body + !ERROR: Dummy procedure 'x' is not compatible with the corresponding argument in the interface body: incompatible dummy procedure interfaces: incompatible dummy argument #1: incompatible dummy data object types: INTEGER(4) vs REAL(4) procedure(s_integer) :: x end end @@ -357,3 +357,19 @@ module character(2) function f() module character(3) function f() end function end submodule + +module m11 + interface + module subroutine s(x) + ! The subroutine/function distinction is not known. + external x + end + end interface +end +submodule(m11) sm11 + contains + !WARNING: Dummy procedure 'x' does not exactly match the corresponding argument in the interface body + module subroutine s(x) + call x ! no error + end +end diff --git a/libc/src/stdlib/qsort_data.h b/libc/src/stdlib/qsort_data.h index db045332708ae6e..c529d55ca46ffde 100644 --- a/libc/src/stdlib/qsort_data.h +++ b/libc/src/stdlib/qsort_data.h @@ -89,9 +89,15 @@ class Array { size_t size() const { return array_size; } // Make an Array starting at index |i| and size |s|. - Array make_array(size_t i, size_t s) const { + LIBC_INLINE Array make_array(size_t i, size_t s) const { return Array(get(i), s, elem_size, compare); } + + // Reset this Array to point at a different interval of the same items. + LIBC_INLINE void reset_bounds(uint8_t *a, size_t s) { + array = a; + array_size = s; + } }; using SortingRoutine = void(const Array &); diff --git a/libc/src/stdlib/quick_sort.h b/libc/src/stdlib/quick_sort.h index 89ec107161e3e5a..82b90a7d511d999 100644 --- a/libc/src/stdlib/quick_sort.h +++ b/libc/src/stdlib/quick_sort.h @@ -19,7 +19,7 @@ namespace LIBC_NAMESPACE_DECL { namespace internal { // A simple quicksort implementation using the Hoare partition scheme. -static size_t partition(const Array &array) { +LIBC_INLINE size_t partition(const Array &array) { const size_t array_size = array.size(); size_t pivot_index = array_size / 2; uint8_t *pivot = array.get(pivot_index); @@ -59,17 +59,32 @@ static size_t partition(const Array &array) { } } -LIBC_INLINE void quick_sort(const Array &array) { - const size_t array_size = array.size(); - if (array_size <= 1) - return; - size_t split_index = partition(array); - if (array_size <= 2) { - // The partition operation sorts the two element array. - return; +LIBC_INLINE void quick_sort(Array array) { + while (true) { + const size_t array_size = array.size(); + if (array_size <= 1) + return; + size_t split_index = partition(array); + if (array_size == 2) + // The partition operation sorts the two element array. + return; + + // Make Arrays describing the two sublists that still need sorting. + Array left = array.make_array(0, split_index); + Array right = array.make_array(split_index, array.size() - split_index); + + // Recurse to sort the smaller of the two, and then loop round within this + // function to sort the larger. This way, recursive call depth is bounded + // by log2 of the total array size, because every recursive call is sorting + // a list at most half the length of the one in its caller. + if (left.size() < right.size()) { + quick_sort(left); + array.reset_bounds(right.get(0), right.size()); + } else { + quick_sort(right); + array.reset_bounds(left.get(0), left.size()); + } } - quick_sort(array.make_array(0, split_index)); - quick_sort(array.make_array(split_index, array.size() - split_index)); } } // namespace internal diff --git a/libclc/README.TXT b/libclc/README.TXT index 57b5242b9bbecbc..c55a3e441638daf 100644 --- a/libclc/README.TXT +++ b/libclc/README.TXT @@ -1,7 +1,7 @@ libclc ------ -libclc is an open source, BSD licensed implementation of the library +libclc is an open source implementation of the library requirements of the OpenCL C programming language, as specified by the OpenCL 1.1 Specification. The following sections of the specification impose library requirements: diff --git a/libcxx/docs/UserDocumentation.rst b/libcxx/docs/UserDocumentation.rst index 6659fa54f49df58..f5e55994aa75727 100644 --- a/libcxx/docs/UserDocumentation.rst +++ b/libcxx/docs/UserDocumentation.rst @@ -317,6 +317,15 @@ Unpoisoning may not be an option, if (for example) you are not maintaining the a * You are using allocator, which does not call destructor during deallocation. * You are aware that memory allocated with an allocator may be accessed, even when unused by container. +Support for compiler extensions +------------------------------- + +Clang, GCC and other compilers all provide their own set of language extensions. These extensions +have often been developed without particular consideration for their interaction with the library, +and as such, libc++ does not go out of its way to support them. The library may support specific +compiler extensions which would then be documented explicitly, but the basic expectation should be +that no special support is provided for arbitrary compiler extensions. + Platform specific behavior ========================== diff --git a/libcxx/utils/ci/run-buildbot b/libcxx/utils/ci/run-buildbot index 229963b38f52b38..e040f15acc3daeb 100755 --- a/libcxx/utils/ci/run-buildbot +++ b/libcxx/utils/ci/run-buildbot @@ -371,14 +371,8 @@ bootstrapping-build) -DLLVM_LIT_ARGS="-sv --xunit-xml-output test-results.xml --timeout=1500 --time-tests" echo "+++ Running the LLDB libc++ data formatter tests" - ${NINJA} -vC "${BUILD_DIR}" check-lldb-api-functionalities-data-formatter-data-formatter-stl-libcxx \ - check-lldb-api-functionalities-data-formatter-data-formatter-stl-generic \ - check-lldb-api-functionalities-data-formatter-data-formatter-stl-libcxx-simulators \ - check-lldb-api-commands-expression-import-std-module \ - check-lldb-api-lang-cpp-std-function-step-into-callable \ - check-lldb-api-lang-cpp-std-function-recognizer \ - check-lldb-api-lang-cpp-std-invoke-recognizer - + ${NINJA} -vC "${BUILD_DIR}" lldb-api-test-deps + ${BUILD_DIR}/bin/llvm-lit -sv --param dotest-args='--category libc++' "${MONOREPO_ROOT}/lldb/test/API" echo "--- Running the libc++ and libc++abi tests" ${NINJA} -vC "${BUILD_DIR}" check-runtimes diff --git a/lld/COFF/LTO.cpp b/lld/COFF/LTO.cpp index 5c881bc01c663d5..da73fe7763ceeaa 100644 --- a/lld/COFF/LTO.cpp +++ b/lld/COFF/LTO.cpp @@ -118,6 +118,7 @@ BitcodeCompiler::BitcodeCompiler(COFFLinkerContext &c) : ctx(c) { if (ctx.config.thinLTOIndexOnly) { auto OnIndexWrite = [&](StringRef S) { thinIndices.erase(S); }; backend = lto::createWriteIndexesThinBackend( + llvm::hardware_concurrency(ctx.config.thinLTOJobs), std::string(ctx.config.thinLTOPrefixReplaceOld), std::string(ctx.config.thinLTOPrefixReplaceNew), std::string(ctx.config.thinLTOPrefixReplaceNativeObject), diff --git a/lld/ELF/Arch/AArch64.cpp b/lld/ELF/Arch/AArch64.cpp index 1368e209c8cfd16..f595504e621181e 100644 --- a/lld/ELF/Arch/AArch64.cpp +++ b/lld/ELF/Arch/AArch64.cpp @@ -32,7 +32,7 @@ uint64_t elf::getAArch64Page(uint64_t expr) { // Target Identification has been enabled. As linker generated branches are // via x16 the BTI landing pads are defined as: BTI C, BTI J, BTI JC, PACIASP, // PACIBSP. -bool elf::isAArch64BTILandingPad(Symbol &s, int64_t a) { +bool elf::isAArch64BTILandingPad(Ctx &ctx, Symbol &s, int64_t a) { // PLT entries accessed indirectly have a BTI c. if (s.isInPlt(ctx)) return true; diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp index 68cce62188b0fe5..0885815a22a14a5 100644 --- a/lld/ELF/InputSection.cpp +++ b/lld/ELF/InputSection.cpp @@ -625,27 +625,40 @@ static uint64_t getARMStaticBase(const Symbol &sym) { // points the corresponding R_RISCV_PCREL_HI20 relocation, and the target VA // is calculated using PCREL_HI20's symbol. // -// This function returns the R_RISCV_PCREL_HI20 relocation from -// R_RISCV_PCREL_LO12's symbol and addend. -static Relocation *getRISCVPCRelHi20(const Symbol *sym, uint64_t addend) { +// This function returns the R_RISCV_PCREL_HI20 relocation from the +// R_RISCV_PCREL_LO12 relocation. +static Relocation *getRISCVPCRelHi20(const InputSectionBase *loSec, + const Relocation &loReloc) { + uint64_t addend = loReloc.addend; + Symbol *sym = loReloc.sym; + const Defined *d = cast(sym); if (!d->section) { - errorOrWarn("R_RISCV_PCREL_LO12 relocation points to an absolute symbol: " + - sym->getName()); + errorOrWarn( + loSec->getLocation(loReloc.offset) + + ": R_RISCV_PCREL_LO12 relocation points to an absolute symbol: " + + sym->getName()); return nullptr; } - InputSection *isec = cast(d->section); + InputSection *hiSec = cast(d->section); + + if (hiSec != loSec) + errorOrWarn(loSec->getLocation(loReloc.offset) + + ": R_RISCV_PCREL_LO12 relocation points to a symbol '" + + sym->getName() + "' in a different section '" + hiSec->name + + "'"); if (addend != 0) - warn("non-zero addend in R_RISCV_PCREL_LO12 relocation to " + - isec->getObjMsg(d->value) + " is ignored"); + warn(loSec->getLocation(loReloc.offset) + + ": non-zero addend in R_RISCV_PCREL_LO12 relocation to " + + hiSec->getObjMsg(d->value) + " is ignored"); // Relocations are sorted by offset, so we can use std::equal_range to do // binary search. - Relocation r; - r.offset = d->value; + Relocation hiReloc; + hiReloc.offset = d->value; auto range = - std::equal_range(isec->relocs().begin(), isec->relocs().end(), r, + std::equal_range(hiSec->relocs().begin(), hiSec->relocs().end(), hiReloc, [](const Relocation &lhs, const Relocation &rhs) { return lhs.offset < rhs.offset; }); @@ -655,8 +668,9 @@ static Relocation *getRISCVPCRelHi20(const Symbol *sym, uint64_t addend) { it->type == R_RISCV_TLS_GD_HI20 || it->type == R_RISCV_TLS_GOT_HI20) return &*it; - errorOrWarn("R_RISCV_PCREL_LO12 relocation points to " + - isec->getObjMsg(d->value) + + errorOrWarn(loSec->getLocation(loReloc.offset) + + ": R_RISCV_PCREL_LO12 relocation points to " + + hiSec->getObjMsg(d->value) + " without an associated R_RISCV_PCREL_HI20 relocation"); return nullptr; } @@ -825,7 +839,7 @@ uint64_t InputSectionBase::getRelocTargetVA(Ctx &ctx, const Relocation &r, return getAArch64Page(val) - getAArch64Page(p); } case R_RISCV_PC_INDIRECT: { - if (const Relocation *hiRel = getRISCVPCRelHi20(r.sym, a)) + if (const Relocation *hiRel = getRISCVPCRelHi20(this, r)) return getRelocTargetVA(ctx, *hiRel, r.sym->getVA()); return 0; } diff --git a/lld/ELF/LTO.cpp b/lld/ELF/LTO.cpp index 02b5c09150c65c5..627cbbdce83f37c 100644 --- a/lld/ELF/LTO.cpp +++ b/lld/ELF/LTO.cpp @@ -62,7 +62,7 @@ static lto::Config createConfig(Ctx &ctx) { c.Options.DataSections = true; // Check if basic block sections must be used. - // Allowed values for --lto-basic-block-sections are "all", "labels", + // Allowed values for --lto-basic-block-sections are "all", // "", or none. This is the equivalent // of -fbasic-block-sections= flag in clang. if (!ctx.arg.ltoBasicBlockSections.empty()) { @@ -70,7 +70,8 @@ static lto::Config createConfig(Ctx &ctx) { c.Options.BBSections = BasicBlockSection::All; } else if (ctx.arg.ltoBasicBlockSections == "labels") { c.Options.BBAddrMap = true; - c.Options.BBSections = BasicBlockSection::None; + warn("'--lto-basic-block-sections=labels' is deprecated; Please use " + "'--lto-basic-block-address-map' instead"); } else if (ctx.arg.ltoBasicBlockSections == "none") { c.Options.BBSections = BasicBlockSection::None; } else { @@ -179,6 +180,7 @@ BitcodeCompiler::BitcodeCompiler(Ctx &ctx) : ctx(ctx) { auto onIndexWrite = [&](StringRef s) { thinIndices.erase(s); }; if (ctx.arg.thinLTOIndexOnly) { backend = lto::createWriteIndexesThinBackend( + llvm::hardware_concurrency(ctx.arg.thinLTOJobs), std::string(ctx.arg.thinLTOPrefixReplaceOld), std::string(ctx.arg.thinLTOPrefixReplaceNew), std::string(ctx.arg.thinLTOPrefixReplaceNativeObject), diff --git a/lld/ELF/Target.h b/lld/ELF/Target.h index f3eb421b494d899..8a415c84cb57429 100644 --- a/lld/ELF/Target.h +++ b/lld/ELF/Target.h @@ -124,20 +124,20 @@ class TargetInfo { bool gotBaseSymInGotPlt = false; static constexpr RelType noneRel = 0; - RelType copyRel; - RelType gotRel; - RelType pltRel; - RelType relativeRel; - RelType iRelativeRel; - RelType symbolicRel; - RelType tlsDescRel; - RelType tlsGotRel; - RelType tlsModuleIndexRel; - RelType tlsOffsetRel; + RelType copyRel = 0; + RelType gotRel = 0; + RelType pltRel = 0; + RelType relativeRel = 0; + RelType iRelativeRel = 0; + RelType symbolicRel = 0; + RelType tlsDescRel = 0; + RelType tlsGotRel = 0; + RelType tlsModuleIndexRel = 0; + RelType tlsOffsetRel = 0; unsigned gotEntrySize = ctx.arg.wordsize; - unsigned pltEntrySize; - unsigned pltHeaderSize; - unsigned ipltEntrySize; + unsigned pltEntrySize = 0; + unsigned pltHeaderSize = 0; + unsigned ipltEntrySize = 0; // At least on x86_64 positions 1 and 2 are used by the first plt entry // to support lazy loading. @@ -156,7 +156,7 @@ class TargetInfo { // A 4-byte field corresponding to one or more trap instructions, used to pad // executable OutputSections. - std::array trapInstr; + std::array trapInstr = {}; // Stores the NOP instructions of different sizes for the target and is used // to pad sections that are relaxed. @@ -239,7 +239,7 @@ void writePrefixedInst(Ctx &, uint8_t *loc, uint64_t insn); void addPPC64SaveRestore(Ctx &); uint64_t getPPC64TocBase(Ctx &ctx); uint64_t getAArch64Page(uint64_t expr); -bool isAArch64BTILandingPad(Symbol &s, int64_t a); +bool isAArch64BTILandingPad(Ctx &, Symbol &s, int64_t a); template void writeARMCmseImportLib(Ctx &); uint64_t getLoongArchPageDelta(uint64_t dest, uint64_t pc, RelType type); void riscvFinalizeRelax(int passes); diff --git a/lld/ELF/Thunks.cpp b/lld/ELF/Thunks.cpp index 68dc5ceeab26707..19fe25a2688f4a1 100644 --- a/lld/ELF/Thunks.cpp +++ b/lld/ELF/Thunks.cpp @@ -549,7 +549,7 @@ void Thunk::setOffset(uint64_t newOffset) { } // AArch64 Thunk base class. -static uint64_t getAArch64ThunkDestVA(const Symbol &s, int64_t a) { +static uint64_t getAArch64ThunkDestVA(Ctx &ctx, const Symbol &s, int64_t a) { uint64_t v = s.isInPlt(ctx) ? s.getPltVA(ctx) : s.getVA(a); return v; } @@ -557,7 +557,7 @@ static uint64_t getAArch64ThunkDestVA(const Symbol &s, int64_t a) { bool AArch64Thunk::getMayUseShortThunk() { if (!mayUseShortThunk) return false; - uint64_t s = getAArch64ThunkDestVA(destination, addend); + uint64_t s = getAArch64ThunkDestVA(ctx, destination, addend); uint64_t p = getThunkTargetSym()->getVA(); mayUseShortThunk = llvm::isInt<28>(s - p); return mayUseShortThunk; @@ -568,7 +568,7 @@ void AArch64Thunk::writeTo(uint8_t *buf) { writeLong(buf); return; } - uint64_t s = getAArch64ThunkDestVA(destination, addend); + uint64_t s = getAArch64ThunkDestVA(ctx, destination, addend); uint64_t p = getThunkTargetSym()->getVA(); write32(buf, 0x14000000); // b S ctx.target->relocateNoSym(buf, R_AARCH64_CALL26, s - p); @@ -591,8 +591,9 @@ void AArch64ABSLongThunk::writeLong(uint8_t *buf) { // If mayNeedLandingPad is true then destination is an // AArch64BTILandingPadThunk that defines landingPad. assert(!mayNeedLandingPad || landingPad != nullptr); - uint64_t s = mayNeedLandingPad ? landingPad->getVA(0) - : getAArch64ThunkDestVA(destination, addend); + uint64_t s = mayNeedLandingPad + ? landingPad->getVA(0) + : getAArch64ThunkDestVA(ctx, destination, addend); memcpy(buf, data, sizeof(data)); ctx.target->relocateNoSym(buf + 8, R_AARCH64_ABS64, s); } @@ -619,8 +620,9 @@ void AArch64ADRPThunk::writeLong(uint8_t *buf) { // if mayNeedLandingPad is true then destination is an // AArch64BTILandingPadThunk that defines landingPad. assert(!mayNeedLandingPad || landingPad != nullptr); - uint64_t s = mayNeedLandingPad ? landingPad->getVA(0) - : getAArch64ThunkDestVA(destination, addend); + uint64_t s = mayNeedLandingPad + ? landingPad->getVA(0) + : getAArch64ThunkDestVA(ctx, destination, addend); uint64_t p = getThunkTargetSym()->getVA(); memcpy(buf, data, sizeof(data)); ctx.target->relocateNoSym(buf, R_AARCH64_ADR_PREL_PG_HI21, @@ -676,7 +678,7 @@ void AArch64BTILandingPadThunk::writeLong(uint8_t *buf) { } // ARM Target Thunks -static uint64_t getARMThunkDestVA(const Symbol &s) { +static uint64_t getARMThunkDestVA(Ctx &ctx, const Symbol &s) { uint64_t v = s.isInPlt(ctx) ? s.getPltVA(ctx) : s.getVA(); return SignExtend64<32>(v); } @@ -686,7 +688,7 @@ static uint64_t getARMThunkDestVA(const Symbol &s) { bool ARMThunk::getMayUseShortThunk() { if (!mayUseShortThunk) return false; - uint64_t s = getARMThunkDestVA(destination); + uint64_t s = getARMThunkDestVA(ctx, destination); if (s & 1) { mayUseShortThunk = false; return false; @@ -703,7 +705,7 @@ void ARMThunk::writeTo(uint8_t *buf) { return; } - uint64_t s = getARMThunkDestVA(destination); + uint64_t s = getARMThunkDestVA(ctx, destination); uint64_t p = getThunkTargetSym()->getVA(); int64_t offset = s - p - 8; write32(buf, 0xea000000); // b S @@ -729,7 +731,7 @@ bool ARMThunk::isCompatibleWith(const InputSection &isec, bool ThumbThunk::getMayUseShortThunk() { if (!mayUseShortThunk || !ctx.arg.armJ1J2BranchEncoding) return false; - uint64_t s = getARMThunkDestVA(destination); + uint64_t s = getARMThunkDestVA(ctx, destination); if ((s & 1) == 0) { mayUseShortThunk = false; return false; @@ -746,7 +748,7 @@ void ThumbThunk::writeTo(uint8_t *buf) { return; } - uint64_t s = getARMThunkDestVA(destination); + uint64_t s = getARMThunkDestVA(ctx, destination); uint64_t p = getThunkTargetSym()->getVA(); int64_t offset = s - p - 4; write16(buf + 0, 0xf000); // b.w S @@ -768,7 +770,7 @@ void ARMV7ABSLongThunk::writeLong(uint8_t *buf) { write32(buf + 0, 0xe300c000); // movw ip,:lower16:S write32(buf + 4, 0xe340c000); // movt ip,:upper16:S write32(buf + 8, 0xe12fff1c); // bx ip - uint64_t s = getARMThunkDestVA(destination); + uint64_t s = getARMThunkDestVA(ctx, destination); ctx.target->relocateNoSym(buf, R_ARM_MOVW_ABS_NC, s); ctx.target->relocateNoSym(buf + 4, R_ARM_MOVT_ABS, s); } @@ -785,7 +787,7 @@ void ThumbV7ABSLongThunk::writeLong(uint8_t *buf) { write16(buf + 4, 0xf2c0); // movt ip, :upper16:S write16(buf + 6, 0x0c00); write16(buf + 8, 0x4760); // bx ip - uint64_t s = getARMThunkDestVA(destination); + uint64_t s = getARMThunkDestVA(ctx, destination); ctx.target->relocateNoSym(buf, R_ARM_THM_MOVW_ABS_NC, s); ctx.target->relocateNoSym(buf + 4, R_ARM_THM_MOVT_ABS, s); } @@ -801,7 +803,7 @@ void ARMV7PILongThunk::writeLong(uint8_t *buf) { write32(buf + 4, 0xe340c000); // movt ip,:upper16:S - (P + (L1-P) + 8) write32(buf + 8, 0xe08cc00f); // L1: add ip, ip, pc write32(buf + 12, 0xe12fff1c); // bx ip - uint64_t s = getARMThunkDestVA(destination); + uint64_t s = getARMThunkDestVA(ctx, destination); uint64_t p = getThunkTargetSym()->getVA(); int64_t offset = s - p - 16; ctx.target->relocateNoSym(buf, R_ARM_MOVW_PREL_NC, offset); @@ -821,7 +823,7 @@ void ThumbV7PILongThunk::writeLong(uint8_t *buf) { write16(buf + 6, 0x0c00); write16(buf + 8, 0x44fc); // L1: add ip, pc write16(buf + 10, 0x4760); // bx ip - uint64_t s = getARMThunkDestVA(destination); + uint64_t s = getARMThunkDestVA(ctx, destination); uint64_t p = getThunkTargetSym()->getVA() & ~0x1; int64_t offset = s - p - 12; ctx.target->relocateNoSym(buf, R_ARM_THM_MOVW_PREL_NC, offset); @@ -844,7 +846,7 @@ void ThumbV6MABSLongThunk::writeLong(uint8_t *buf) { write16(buf + 4, 0x9001); // str r0, [sp, #4] ; SP + 4 = S write16(buf + 6, 0xbd01); // pop {r0, pc} ; restore r0 and branch to dest write32(buf + 8, 0x00000000); // L1: .word S - uint64_t s = getARMThunkDestVA(destination); + uint64_t s = getARMThunkDestVA(ctx, destination); ctx.target->relocateNoSym(buf + 8, R_ARM_ABS32, s); } @@ -871,7 +873,7 @@ void ThumbV6MABSXOLongThunk::writeLong(uint8_t *buf) { write16(buf + 14, 0x3000); // adds r0, :lower0_7:S write16(buf + 16, 0x9001); // str r0, [sp, #4] ; SP + 4 = S write16(buf + 18, 0xbd01); // pop {r0, pc} ; restore r0 and branch to dest - uint64_t s = getARMThunkDestVA(destination); + uint64_t s = getARMThunkDestVA(ctx, destination); ctx.target->relocateNoSym(buf + 2, R_ARM_THM_ALU_ABS_G3, s); ctx.target->relocateNoSym(buf + 6, R_ARM_THM_ALU_ABS_G2_NC, s); ctx.target->relocateNoSym(buf + 10, R_ARM_THM_ALU_ABS_G1_NC, s); @@ -895,7 +897,7 @@ void ThumbV6MPILongThunk::writeLong(uint8_t *buf) { write16(buf + 8, 0x44e7); // L1: add pc, ip ; transfer control write16(buf + 10, 0x46c0); // nop ; pad to 4-byte boundary write32(buf + 12, 0x00000000); // L2: .word S - (P + (L1 - P) + 4) - uint64_t s = getARMThunkDestVA(destination); + uint64_t s = getARMThunkDestVA(ctx, destination); uint64_t p = getThunkTargetSym()->getVA() & ~0x1; ctx.target->relocateNoSym(buf + 12, R_ARM_REL32, s - p - 12); } @@ -912,7 +914,7 @@ void ARMV5LongLdrPcThunk::writeLong(uint8_t *buf) { write32(buf + 0, 0xe51ff004); // ldr pc, [pc,#-4] ; L1 write32(buf + 4, 0x00000000); // L1: .word S ctx.target->relocateNoSym(buf + 4, R_ARM_ABS32, - getARMThunkDestVA(destination)); + getARMThunkDestVA(ctx, destination)); } void ARMV5LongLdrPcThunk::addSymbols(ThunkSection &isec) { @@ -928,7 +930,7 @@ void ARMV4ABSLongBXThunk::writeLong(uint8_t *buf) { write32(buf + 4, 0xe12fff1c); // bx r12 write32(buf + 8, 0x00000000); // L1: .word S ctx.target->relocateNoSym(buf + 8, R_ARM_ABS32, - getARMThunkDestVA(destination)); + getARMThunkDestVA(ctx, destination)); } void ARMV4ABSLongBXThunk::addSymbols(ThunkSection &isec) { @@ -945,7 +947,7 @@ void ThumbV4ABSLongBXThunk::writeLong(uint8_t *buf) { write32(buf + 4, 0xe51ff004); // ldr pc, [pc, #-4] ; L1 write32(buf + 8, 0x00000000); // L1: .word S ctx.target->relocateNoSym(buf + 8, R_ARM_ABS32, - getARMThunkDestVA(destination)); + getARMThunkDestVA(ctx, destination)); } void ThumbV4ABSLongBXThunk::addSymbols(ThunkSection &isec) { @@ -964,7 +966,7 @@ void ThumbV4ABSLongThunk::writeLong(uint8_t *buf) { write32(buf + 8, 0xe12fff1c); // bx r12 write32(buf + 12, 0x00000000); // L1: .word S ctx.target->relocateNoSym(buf + 12, R_ARM_ABS32, - getARMThunkDestVA(destination)); + getARMThunkDestVA(ctx, destination)); } void ThumbV4ABSLongThunk::addSymbols(ThunkSection &isec) { @@ -981,7 +983,7 @@ void ARMV4PILongBXThunk::writeLong(uint8_t *buf) { write32(buf + 4, 0xe08fc00c); // L1: add ip, pc, ip write32(buf + 8, 0xe12fff1c); // bx ip write32(buf + 12, 0x00000000); // L2: .word S - (P + (L1 - P) + 8) - uint64_t s = getARMThunkDestVA(destination); + uint64_t s = getARMThunkDestVA(ctx, destination); uint64_t p = getThunkTargetSym()->getVA() & ~0x1; ctx.target->relocateNoSym(buf + 12, R_ARM_REL32, s - p - 12); } @@ -998,7 +1000,7 @@ void ARMV4PILongThunk::writeLong(uint8_t *buf) { write32(buf + 0, 0xe59fc000); // P: ldr ip, [pc] ; L2 write32(buf + 4, 0xe08ff00c); // L1: add pc, pc, r12 write32(buf + 8, 0x00000000); // L2: .word S - (P + (L1 - P) + 8) - uint64_t s = getARMThunkDestVA(destination); + uint64_t s = getARMThunkDestVA(ctx, destination); uint64_t p = getThunkTargetSym()->getVA() & ~0x1; ctx.target->relocateNoSym(buf + 8, R_ARM_REL32, s - p - 12); } @@ -1017,7 +1019,7 @@ void ThumbV4PILongBXThunk::writeLong(uint8_t *buf) { write32(buf + 4, 0xe59fc000); // ldr r12, [pc] ; L2 write32(buf + 8, 0xe08cf00f); // L1: add pc, r12, pc write32(buf + 12, 0x00000000); // L2: .word S - (P + (L1 - P) + 8) - uint64_t s = getARMThunkDestVA(destination); + uint64_t s = getARMThunkDestVA(ctx, destination); uint64_t p = getThunkTargetSym()->getVA() & ~0x1; ctx.target->relocateNoSym(buf + 12, R_ARM_REL32, s - p - 16); } @@ -1038,7 +1040,7 @@ void ThumbV4PILongThunk::writeLong(uint8_t *buf) { write32(buf + 8, 0xe08fc00c); // L1: add ip, pc, ip write32(buf + 12, 0xe12fff1c); // bx ip write32(buf + 16, 0x00000000); // L2: .word S - (P + (L1 - P) + 8) - uint64_t s = getARMThunkDestVA(destination); + uint64_t s = getARMThunkDestVA(ctx, destination); uint64_t p = getThunkTargetSym()->getVA() & ~0x1; ctx.target->relocateNoSym(buf + 16, R_ARM_REL32, s - p - 16); } @@ -1366,7 +1368,7 @@ static Thunk *addThunkAArch64(Ctx &ctx, RelType type, Symbol &s, int64_t a) { fatal("unrecognized relocation type"); bool mayNeedLandingPad = (ctx.arg.andFeatures & GNU_PROPERTY_AARCH64_FEATURE_1_BTI) && - !isAArch64BTILandingPad(s, a); + !isAArch64BTILandingPad(ctx, s, a); if (ctx.arg.picThunk) return make(ctx, s, a, mayNeedLandingPad); return make(ctx, s, a, mayNeedLandingPad); diff --git a/lld/MachO/LTO.cpp b/lld/MachO/LTO.cpp index 6527cbb68f24984..28f5290edb58e30 100644 --- a/lld/MachO/LTO.cpp +++ b/lld/MachO/LTO.cpp @@ -87,6 +87,7 @@ BitcodeCompiler::BitcodeCompiler() { auto onIndexWrite = [&](StringRef S) { thinIndices.erase(S); }; if (config->thinLTOIndexOnly) { backend = lto::createWriteIndexesThinBackend( + llvm::hardware_concurrency(config->thinLTOJobs), std::string(config->thinLTOPrefixReplaceOld), std::string(config->thinLTOPrefixReplaceNew), std::string(config->thinLTOPrefixReplaceNativeObject), diff --git a/lld/test/COFF/thinlto-emit-imports.ll b/lld/test/COFF/thinlto-emit-imports.ll index b47a6cea4eb7df8..26af017b17b2c5d 100644 --- a/lld/test/COFF/thinlto-emit-imports.ll +++ b/lld/test/COFF/thinlto-emit-imports.ll @@ -35,7 +35,7 @@ ; RUN: not lld-link -entry:main -thinlto-index-only \ ; RUN: -thinlto-emit-imports-files %t1.obj %t2.obj %t3.obj \ ; RUN: -out:%t4.exe 2>&1 | FileCheck -DMSG=%errc_EACCES %s --check-prefix=ERR -; ERR: cannot open {{.*}}3.obj.imports: [[MSG]] +; ERR: 'cannot open {{.*}}3.obj.imports': [[MSG]] ; Ensure lld doesn't generate import files when thinlto-index-only is not enabled ; RUN: rm -f %t1.obj.imports diff --git a/lld/test/ELF/lto/basic-block-sections.ll b/lld/test/ELF/lto/basic-block-sections.ll index 35b638ac488a354..4b94e9f75e55c00 100644 --- a/lld/test/ELF/lto/basic-block-sections.ll +++ b/lld/test/ELF/lto/basic-block-sections.ll @@ -1,11 +1,14 @@ ; REQUIRES: x86 ; RUN: llvm-as %s -o %t.o +; RUN: ld.lld %t.o -o %t --lto-basic-block-sections=labels --lto-O0 2>&1 | FileCheck -check-prefix=LABELSWARN %s ; RUN: ld.lld %t.o -o %t --lto-basic-block-sections=all --lto-O0 --save-temps ; RUN: llvm-readobj -s %t.lto.o | FileCheck --check-prefix=SECNAMES %s ; RUN: ld.lld %t.o -o %t --lto-basic-block-sections=all --lto-unique-basic-block-section-names --lto-O0 --save-temps ; RUN: llvm-readobj -s %t.lto.o | FileCheck --check-prefix=SECNAMES-FULL %s ; RUN: llvm-nm %t | FileCheck --check-prefix=SYMS %s +; LABELSWARN: --lto-basic-block-sections=labels' is deprecated; Please use '--lto-basic-block-address-map' instead + ; SECNAMES: Name: .text.foo {{.*}} ; SECNAMES: Name: .text.foo {{.*}} ; SECNAMES: Name: .text.foo {{.*}} diff --git a/lld/test/ELF/lto/thinlto-cant-write-index.ll b/lld/test/ELF/lto/thinlto-cant-write-index.ll index 286fcddd4238a1e..550305986ecd5b4 100644 --- a/lld/test/ELF/lto/thinlto-cant-write-index.ll +++ b/lld/test/ELF/lto/thinlto-cant-write-index.ll @@ -10,7 +10,7 @@ ; RUN: chmod u-w %t2.o.thinlto.bc ; RUN: not ld.lld --plugin-opt=thinlto-index-only -shared %t1.o %t2.o -o /dev/null 2>&1 | FileCheck -DMSG=%errc_EACCES %s ; RUN: chmod u+w %t2.o.thinlto.bc -; CHECK: cannot open {{.*}}2.o.thinlto.bc: [[MSG]] +; CHECK: 'cannot open {{.*}}2.o.thinlto.bc': [[MSG]] target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/lld/test/ELF/lto/thinlto-emit-imports.ll b/lld/test/ELF/lto/thinlto-emit-imports.ll index 253ec08619c982a..1807a3b59d81cb1 100644 --- a/lld/test/ELF/lto/thinlto-emit-imports.ll +++ b/lld/test/ELF/lto/thinlto-emit-imports.ll @@ -10,7 +10,7 @@ ; RUN: touch %t3.o.imports ; RUN: chmod 400 %t3.o.imports ; RUN: not ld.lld --plugin-opt=thinlto-index-only --plugin-opt=thinlto-emit-imports-files -shared %t1.o %t2.o %t3.o -o /dev/null 2>&1 | FileCheck -DMSG=%errc_EACCES %s --check-prefix=ERR -; ERR: cannot open {{.*}}3.o.imports: [[MSG]] +; ERR: 'cannot open {{.*}}3.o.imports': [[MSG]] ; RUN: rm -f %t1.o.imports %t2.o.imports rm -f %t3.o.imports ; RUN: ld.lld --plugin-opt=thinlto-emit-imports-files -shared %t1.o %t2.o %t3.o -o %t4 diff --git a/lld/test/ELF/riscv-pcrel-hilo-error-sections.s b/lld/test/ELF/riscv-pcrel-hilo-error-sections.s new file mode 100644 index 000000000000000..ebdefd357d722db --- /dev/null +++ b/lld/test/ELF/riscv-pcrel-hilo-error-sections.s @@ -0,0 +1,32 @@ +# REQUIRES: riscv + +# RUN: llvm-mc -filetype=obj -triple=riscv64 %s -o %t.o +# RUN: not ld.lld %t.o 2>&1 | FileCheck %s + +# CHECK: error: {{.*}}:(.text.sec_one+0x0): R_RISCV_PCREL_LO12 relocation points to a symbol '.Lpcrel_hi0' in a different section '.text.sec_two' +# CHECK: error: {{.*}}:(.text.sec_one+0x4): R_RISCV_PCREL_LO12 relocation points to a symbol '.Lpcrel_hi1' in a different section '.text.sec_two' +# CHECK-NOT: R_RISCV_PCREL_LO12 relocation points to a symbol '.Lpcrel_hi2' + +## This test is checking that we warn the user when the relocations in their +## object don't follow the RISC-V psABI. In particular, the psABI requires +## that PCREL_LO12 relocations are in the same section as the pcrel_hi +## instruction they point to. + + .section .text.sec_one,"ax" + addi a0, a0, %pcrel_lo(.Lpcrel_hi0) + sw a0, %pcrel_lo(.Lpcrel_hi1)(a1) + + .section .text.sec_two,"ax" +.Lpcrel_hi0: + auipc a0, %pcrel_hi(a) +.Lpcrel_hi1: + auipc a1, %pcrel_hi(a) + +.Lpcrel_hi2: + auipc a2, %pcrel_hi(a) + addi a2, a2, %pcrel_lo(.Lpcrel_hi2) + + .data + .global a +a: + .word 50 diff --git a/lld/test/ELF/riscv-pcrel-hilo-error.s b/lld/test/ELF/riscv-pcrel-hilo-error.s index 1557ac77bb7ed52..4dc80b5a5e716b0 100644 --- a/lld/test/ELF/riscv-pcrel-hilo-error.s +++ b/lld/test/ELF/riscv-pcrel-hilo-error.s @@ -2,7 +2,7 @@ # RUN: llvm-mc -filetype=obj -triple=riscv64 %s -o %t.o # RUN: not ld.lld %t.o --defsym external=0 2>&1 | FileCheck %s -# CHECK: error: R_RISCV_PCREL_LO12 relocation points to an absolute symbol: external +# CHECK: error: {{.*}}:(.text+0x4): R_RISCV_PCREL_LO12 relocation points to an absolute symbol: external # We provide a dummy %pcrel_hi referred to by external to appease the # assembler, but make external weak so --defsym can still override it at link diff --git a/lld/test/MachO/thinlto-emit-imports.ll b/lld/test/MachO/thinlto-emit-imports.ll index 88f766f59c8877f..90ee6a56b93b8f3 100644 --- a/lld/test/MachO/thinlto-emit-imports.ll +++ b/lld/test/MachO/thinlto-emit-imports.ll @@ -33,7 +33,7 @@ ; RUN: chmod 400 %t3.o.imports ; RUN: not %lld --thinlto-index-only --thinlto-emit-imports-files -dylib %t1.o %t2.o %t3.o -o /dev/null 2>&1 \ ; RUN: | FileCheck -DMSG=%errc_EACCES %s --check-prefix=ERR -; ERR: cannot open {{.*}}3.o.imports: [[MSG]] +; ERR: 'cannot open {{.*}}3.o.imports': [[MSG]] ; Ensure lld doesn't generate import files when thinlto-index-only is not enabled ; RUN: rm -f %t1.o.imports diff --git a/lldb/docs/resources/test.rst b/lldb/docs/resources/test.rst index 906e687cb13b131..715d3772fe2787c 100644 --- a/lldb/docs/resources/test.rst +++ b/lldb/docs/resources/test.rst @@ -60,7 +60,8 @@ something like ``target.BreakpointCreateByName`` [#]_. A good rule of thumb is to prefer shell tests when what is being tested is relatively simple. Expressivity is limited compared to the API tests, which means that you have to have a well-defined test scenario that you can easily -match with ``FileCheck``. +match with ``FileCheck``. Though Shell tests can be run remotely, behavior +specific to remote debugging must be tested with API tests instead. Another thing to consider are the binaries being debugged, which we call inferiors. For shell tests, they have to be relatively simple. The @@ -592,15 +593,17 @@ test suite, but there are two things to have in mind: multiple connections. For more information on how to setup remote debugging see the Remote debugging page. 2. You must tell the test-suite how to connect to the remote system. This is - achieved using the ``--platform-name``, ``--platform-url`` and - ``--platform-working-dir`` parameters to ``dotest.py``. These parameters - correspond to the platform select and platform connect LLDB commands. You - will usually also need to specify the compiler and architecture for the - remote system. - -Currently, running the remote test suite is supported only with ``dotest.py`` (or -dosep.py with a single thread), but we expect this issue to be addressed in the -near future. + achieved using the ``LLDB_TEST_PLATFORM_URL``, ``LLDB_TEST_PLATFORM_WORKING_DIR`` + flags to cmake, and ``--platform-name`` parameter to ``dotest.py``. + These parameters correspond to the platform select and platform connect + LLDB commands. You will usually also need to specify the compiler and + architecture for the remote system. +3. Remote Shell tests execution is currently supported only for Linux target + platform. It's triggered when ``LLDB_TEST_SYSROOT`` is provided for building + test sources. It can be disabled by setting ``LLDB_TEST_SHELL_DISABLE_REMOTE=On``. + Shell tests are not guaranteed to pass against remote target if the compiler + being used is other than Clang. + Running tests in QEMU System Emulation Environment `````````````````````````````````````````````````` diff --git a/lldb/docs/use/tutorial.rst b/lldb/docs/use/tutorial.rst index 76e8ac4aeab89cf..7bfe1743b917f69 100644 --- a/lldb/docs/use/tutorial.rst +++ b/lldb/docs/use/tutorial.rst @@ -210,7 +210,7 @@ you will have to indicate these are not option markers by putting ``--`` after t command name, but before your command string. LLDB also has a built-in Python interpreter, which is accessible by the -``"script`` command. All the functionality of the debugger is available as classes +``script`` command. All the functionality of the debugger is available as classes in the Python interpreter, so the more complex commands that in GDB you would introduce with the ``define`` command can be done by writing Python functions using the LLDB Python library, then loading the scripts into your running @@ -331,8 +331,8 @@ do: :: - (lldb) help -- Breakpoint ID's consist major and - minor numbers; the major etc... + (lldb) help + -- Breakpoint ID's consist major and minor numbers; the major etc... Breakpoint Names ---------------- diff --git a/lldb/source/Expression/IRExecutionUnit.cpp b/lldb/source/Expression/IRExecutionUnit.cpp index baf70cbccc8c5c7..7bee183d2ff2278 100644 --- a/lldb/source/Expression/IRExecutionUnit.cpp +++ b/lldb/source/Expression/IRExecutionUnit.cpp @@ -780,6 +780,10 @@ IRExecutionUnit::FindInSymbols(const std::vector &names, return LLDB_INVALID_ADDRESS; } + ModuleList non_local_images = target->GetImages(); + // We'll process module_sp separately, before the other modules. + non_local_images.Remove(sc.module_sp); + LoadAddressResolver resolver(target, symbol_was_missing_weak); ModuleFunctionSearchOptions function_options; @@ -787,6 +791,11 @@ IRExecutionUnit::FindInSymbols(const std::vector &names, function_options.include_inlines = false; for (const ConstString &name : names) { + // The lookup order here is as follows: + // 1) Functions in `sc.module_sp` + // 2) Functions in the other modules + // 3) Symbols in `sc.module_sp` + // 4) Symbols in the other modules if (sc.module_sp) { SymbolContextList sc_list; sc.module_sp->FindFunctions(name, CompilerDeclContext(), @@ -796,18 +805,26 @@ IRExecutionUnit::FindInSymbols(const std::vector &names, return *load_addr; } - if (sc.target_sp) { + { + SymbolContextList sc_list; + non_local_images.FindFunctions(name, lldb::eFunctionNameTypeFull, + function_options, sc_list); + if (auto load_addr = resolver.Resolve(sc_list)) + return *load_addr; + } + + if (sc.module_sp) { SymbolContextList sc_list; - sc.target_sp->GetImages().FindFunctions(name, lldb::eFunctionNameTypeFull, - function_options, sc_list); + sc.module_sp->FindSymbolsWithNameAndType(name, lldb::eSymbolTypeAny, + sc_list); if (auto load_addr = resolver.Resolve(sc_list)) return *load_addr; } - if (sc.target_sp) { + { SymbolContextList sc_list; - sc.target_sp->GetImages().FindSymbolsWithNameAndType( - name, lldb::eSymbolTypeAny, sc_list); + non_local_images.FindSymbolsWithNameAndType(name, lldb::eSymbolTypeAny, + sc_list); if (auto load_addr = resolver.Resolve(sc_list)) return *load_addr; } diff --git a/lldb/source/Plugins/DynamicLoader/POSIX-DYLD/DynamicLoaderPOSIXDYLD.cpp b/lldb/source/Plugins/DynamicLoader/POSIX-DYLD/DynamicLoaderPOSIXDYLD.cpp index 51e4b3e6728f238..b9c0e174c3be688 100644 --- a/lldb/source/Plugins/DynamicLoader/POSIX-DYLD/DynamicLoaderPOSIXDYLD.cpp +++ b/lldb/source/Plugins/DynamicLoader/POSIX-DYLD/DynamicLoaderPOSIXDYLD.cpp @@ -108,21 +108,6 @@ void DynamicLoaderPOSIXDYLD::DidAttach() { // if we dont have a load address we cant re-base bool rebase_exec = load_offset != LLDB_INVALID_ADDRESS; - // if we have a valid executable - if (executable_sp.get()) { - lldb_private::ObjectFile *obj = executable_sp->GetObjectFile(); - if (obj) { - // don't rebase if the module already has a load address - Target &target = m_process->GetTarget(); - Address addr = obj->GetImageInfoAddress(&target); - if (addr.GetLoadAddress(&target) != LLDB_INVALID_ADDRESS) - rebase_exec = false; - } - } else { - // no executable, nothing to re-base - rebase_exec = false; - } - // if the target executable should be re-based if (rebase_exec) { ModuleList module_list; diff --git a/lldb/test/API/functionalities/archives/TestBSDArchives.py b/lldb/test/API/functionalities/archives/TestBSDArchives.py index 1bef8e896e0be70..88acafdd9e22aad 100644 --- a/lldb/test/API/functionalities/archives/TestBSDArchives.py +++ b/lldb/test/API/functionalities/archives/TestBSDArchives.py @@ -21,11 +21,6 @@ def setUp(self): # Find the line number in a(int) to break at. self.line = line_number("a.c", "// Set file and line breakpoint inside a().") - @expectedFailureAll( - oslist=["windows"], - bugnumber="llvm.org/pr24527. Makefile.rules doesn't know how to build static libs on Windows", - ) - @expectedFailureAll(remote=True) def test(self): """Break inside a() and b() defined within libfoo.a.""" self.build() @@ -126,10 +121,6 @@ def test_frame_var_errors_when_archive_missing(self): @skipIfRemote @skipIf(compiler="clang", compiler_version=["<", "12.0"]) - @expectedFailureAll( - oslist=["windows"], - bugnumber="llvm.org/pr24527. Makefile.rules doesn't know how to build static libs on Windows", - ) def test_archive_specifications(self): """ Create archives and make sure the information we get when retrieving diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx-simulators/optional/TestDataFormatterLibcxxOptionalSimulator.py b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx-simulators/optional/TestDataFormatterLibcxxOptionalSimulator.py index 20822c4deada0f7..7167f4c6c9b5b1c 100644 --- a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx-simulators/optional/TestDataFormatterLibcxxOptionalSimulator.py +++ b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx-simulators/optional/TestDataFormatterLibcxxOptionalSimulator.py @@ -46,7 +46,9 @@ def _run_test(self, defines): for r in range(2): name = f"test_r{r}" defines = [f"REVISION={r}"] - f = functools.partialmethod( - LibcxxOptionalDataFormatterSimulatorTestCase._run_test, defines - ) - setattr(LibcxxOptionalDataFormatterSimulatorTestCase, name, f) + + @functools.wraps(LibcxxOptionalDataFormatterSimulatorTestCase._run_test) + def test_method(self, defines=defines): + LibcxxOptionalDataFormatterSimulatorTestCase._run_test(self, defines) + + setattr(LibcxxOptionalDataFormatterSimulatorTestCase, name, test_method) diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx-simulators/optional/main.cpp b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx-simulators/optional/main.cpp index 94f3c75d395f865..0478454eb613717 100644 --- a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx-simulators/optional/main.cpp +++ b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx-simulators/optional/main.cpp @@ -99,6 +99,5 @@ class optional : private __optional_move_assign_base<_Tp>, int main() { std::__lldb::optional maybe_string{"Hello"}; std::__lldb::optional maybe_int{42}; - __builtin_printf("Break here\n"); - return 0; + return 0; // Break here } diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx-simulators/string/TestDataFormatterLibcxxStringSimulator.py b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx-simulators/string/TestDataFormatterLibcxxStringSimulator.py index fff181440b6d7c5..c996fff4fb7aab4 100644 --- a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx-simulators/string/TestDataFormatterLibcxxStringSimulator.py +++ b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx-simulators/string/TestDataFormatterLibcxxStringSimulator.py @@ -34,7 +34,9 @@ def _run_test(self, defines): if v: name += "_" + v defines += [v] - f = functools.partialmethod( - LibcxxStringDataFormatterSimulatorTestCase._run_test, defines - ) - setattr(LibcxxStringDataFormatterSimulatorTestCase, name, f) + + @functools.wraps(LibcxxStringDataFormatterSimulatorTestCase._run_test) + def test_method(self, defines=defines): + LibcxxStringDataFormatterSimulatorTestCase._run_test(self, defines) + + setattr(LibcxxStringDataFormatterSimulatorTestCase, name, test_method) diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx-simulators/unique_ptr/TestDataFormatterLibcxxUniquePtrSimulator.py b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx-simulators/unique_ptr/TestDataFormatterLibcxxUniquePtrSimulator.py index 0026eca8eebeaec..4ee97e10de84cd1 100644 --- a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx-simulators/unique_ptr/TestDataFormatterLibcxxUniquePtrSimulator.py +++ b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx-simulators/unique_ptr/TestDataFormatterLibcxxUniquePtrSimulator.py @@ -29,7 +29,9 @@ def _run_test(self, defines): for r in range(3): name = "test_r%d" % r defines = ["COMPRESSED_PAIR_REV=%d" % r] - f = functools.partialmethod( - LibcxxUniquePtrDataFormatterSimulatorTestCase._run_test, defines - ) - setattr(LibcxxUniquePtrDataFormatterSimulatorTestCase, name, f) + + @functools.wraps(LibcxxUniquePtrDataFormatterSimulatorTestCase._run_test) + def test_method(self, defines=defines): + LibcxxUniquePtrDataFormatterSimulatorTestCase._run_test(self, defines) + + setattr(LibcxxUniquePtrDataFormatterSimulatorTestCase, name, test_method) diff --git a/lldb/test/API/lang/c/shared_lib_stripped_symbols/TestSharedLibStrippedSymbols.py b/lldb/test/API/lang/c/shared_lib_stripped_symbols/TestSharedLibStrippedSymbols.py index 6971fc0fbc3fde3..55857d703978c60 100644 --- a/lldb/test/API/lang/c/shared_lib_stripped_symbols/TestSharedLibStrippedSymbols.py +++ b/lldb/test/API/lang/c/shared_lib_stripped_symbols/TestSharedLibStrippedSymbols.py @@ -11,7 +11,7 @@ class SharedLibStrippedTestCase(TestBase): @expectedFailureAll(oslist=["windows"]) def test_expr(self): - """Test that types work when defined in a shared library and forwa/d-declared in the main executable""" + """Test that types work when defined in a shared library and forward-declared in the main executable""" if "clang" in self.getCompiler() and "3.4" in self.getCompilerVersion(): self.skipTest( "llvm.org/pr16214 -- clang emits partial DWARF for structures referenced via typedef" diff --git a/lldb/test/API/lit.cfg.py b/lldb/test/API/lit.cfg.py index 96520c7c826246a..6a0a1b0a766755e 100644 --- a/lldb/test/API/lit.cfg.py +++ b/lldb/test/API/lit.cfg.py @@ -303,6 +303,13 @@ def delete_module_cache(path): # In particular, (1) is visited at the top of the file, since the script # derives other information from it. +if is_configured("lldb_platform_url"): + dotest_cmd += ["--platform-url", config.lldb_platform_url] +if is_configured("lldb_platform_working_dir"): + dotest_cmd += ["--platform-working-dir", config.lldb_platform_working_dir] +if is_configured("cmake_sysroot"): + dotest_cmd += ["--sysroot", config.cmake_sysroot] + if is_configured("dotest_user_args_str"): dotest_cmd.extend(config.dotest_user_args_str.split(";")) diff --git a/lldb/test/API/lit.site.cfg.py.in b/lldb/test/API/lit.site.cfg.py.in index 8b2d09ae41cd2ac..db3cd2971f347af 100644 --- a/lldb/test/API/lit.site.cfg.py.in +++ b/lldb/test/API/lit.site.cfg.py.in @@ -24,6 +24,9 @@ config.lua_executable = "@Lua_EXECUTABLE@" config.lua_test_entry = "TestLuaAPI.py" config.dotest_common_args_str = lit_config.substitute("@LLDB_TEST_COMMON_ARGS@") config.dotest_user_args_str = lit_config.substitute("@LLDB_TEST_USER_ARGS@") +config.lldb_platform_url = lit_config.substitute("@LLDB_TEST_PLATFORM_URL@") +config.lldb_platform_working_dir = lit_config.substitute("@LLDB_TEST_PLATFORM_WORKING_DIR@") +config.cmake_sysroot = lit_config.substitute("@LLDB_TEST_SYSROOT@" or "@DEFAULT_SYSROOT@") config.lldb_enable_python = @LLDB_ENABLE_PYTHON@ config.dotest_lit_args_str = None config.enabled_plugins = [] diff --git a/lldb/test/API/macosx/duplicate-archive-members/TestDuplicateMembers.py b/lldb/test/API/macosx/duplicate-archive-members/TestDuplicateMembers.py index 1746b2d3a47b19f..1a0e60e53b1a5ba 100644 --- a/lldb/test/API/macosx/duplicate-archive-members/TestDuplicateMembers.py +++ b/lldb/test/API/macosx/duplicate-archive-members/TestDuplicateMembers.py @@ -8,10 +8,6 @@ class BSDArchivesTestCase(TestBase): - @expectedFailureAll( - oslist=["windows"], - bugnumber="llvm.org/pr24527. Makefile.rules doesn't know how to build static libs on Windows", - ) def test(self): """Break inside a() and b() defined within libfoo.a.""" self.build() diff --git a/lldb/test/API/tools/lldb-dap/coreFile/TestDAP_coreFile.py b/lldb/test/API/tools/lldb-dap/coreFile/TestDAP_coreFile.py index b85b6048439639a..5189435185607f8 100644 --- a/lldb/test/API/tools/lldb-dap/coreFile/TestDAP_coreFile.py +++ b/lldb/test/API/tools/lldb-dap/coreFile/TestDAP_coreFile.py @@ -58,7 +58,7 @@ def test_core_file(self): self.assertEqual(self.get_stackFrames(), expected_frames) @skipIfLLVMTargetMissing("X86") - def test_core_file_source_mapping(self): + def test_core_file_source_mapping_array(self): """Test that sourceMap property is correctly applied when loading a core""" current_dir = os.path.dirname(__file__) exe_file = os.path.join(current_dir, "linux-x86_64.out") @@ -70,3 +70,17 @@ def test_core_file_source_mapping(self): self.attach(exe_file, coreFile=core_file, sourceMap=source_map) self.assertIn(current_dir, self.get_stackFrames()[0]["source"]["path"]) + + @skipIfLLVMTargetMissing("X86") + def test_core_file_source_mapping_object(self): + """Test that sourceMap property is correctly applied when loading a core""" + current_dir = os.path.dirname(__file__) + exe_file = os.path.join(current_dir, "linux-x86_64.out") + core_file = os.path.join(current_dir, "linux-x86_64.core") + + self.create_debug_adaptor() + + source_map = {"/home/labath/test": current_dir} + self.attach(exe_file, coreFile=core_file, sourceMap=source_map) + + self.assertIn(current_dir, self.get_stackFrames()[0]["source"]["path"]) diff --git a/lldb/test/API/tools/lldb-dap/launch/TestDAP_launch.py b/lldb/test/API/tools/lldb-dap/launch/TestDAP_launch.py index a16f2da3c4df716..7898d01457afc4d 100644 --- a/lldb/test/API/tools/lldb-dap/launch/TestDAP_launch.py +++ b/lldb/test/API/tools/lldb-dap/launch/TestDAP_launch.py @@ -224,12 +224,47 @@ def test_args(self): 'arg[%i] "%s" not in "%s"' % (i + 1, quoted_arg, lines[i]), ) - def test_environment(self): + def test_environment_with_object(self): + """ + Tests launch of a simple program with environment variables + """ + program = self.getBuildArtifact("a.out") + env = { + "NO_VALUE": "", + "WITH_VALUE": "BAR", + "EMPTY_VALUE": "", + "SPACE": "Hello World", + } + + self.build_and_launch(program, env=env) + self.continue_to_exit() + + # Now get the STDOUT and verify our arguments got passed correctly + output = self.get_stdout() + self.assertTrue(output and len(output) > 0, "expect program output") + lines = output.splitlines() + # Skip the all arguments so we have only environment vars left + while len(lines) and lines[0].startswith("arg["): + lines.pop(0) + # Make sure each environment variable in "env" is actually set in the + # program environment that was printed to STDOUT + for var in env: + found = False + for program_var in lines: + if var in program_var: + found = True + break + self.assertTrue( + found, '"%s" must exist in program environment (%s)' % (var, lines) + ) + + def test_environment_with_array(self): """ Tests launch of a simple program with environment variables """ program = self.getBuildArtifact("a.out") env = ["NO_VALUE", "WITH_VALUE=BAR", "EMPTY_VALUE=", "SPACE=Hello World"] + self.build_and_launch(program, env=env) self.continue_to_exit() diff --git a/lldb/test/API/tools/lldb-dap/runInTerminal/TestDAP_runInTerminal.py b/lldb/test/API/tools/lldb-dap/runInTerminal/TestDAP_runInTerminal.py index b214b512c0de341..e866787c4d9d534 100644 --- a/lldb/test/API/tools/lldb-dap/runInTerminal/TestDAP_runInTerminal.py +++ b/lldb/test/API/tools/lldb-dap/runInTerminal/TestDAP_runInTerminal.py @@ -90,6 +90,28 @@ def test_runInTerminal(self): env = self.dap_server.request_evaluate("foo")["body"]["result"] self.assertIn("bar", env) + def test_runInTerminalWithObjectEnv(self): + if not self.isTestSupported(): + return + """ + Tests the "runInTerminal" reverse request. It makes sure that the IDE can + launch the inferior with the correct environment variables using an object. + """ + program = self.getBuildArtifact("a.out") + self.build_and_launch(program, runInTerminal=True, env={"FOO": "BAR"}) + + self.assertEqual( + len(self.dap_server.reverse_requests), + 1, + "make sure we got a reverse request", + ) + + request = self.dap_server.reverse_requests[0] + request_envs = request["arguments"]["env"] + + self.assertIn("FOO", request_envs) + self.assertEqual("BAR", request_envs["FOO"]) + @skipIfWindows @skipIf(archs=no_match(["x86_64"])) def test_runInTerminalInvalidTarget(self): diff --git a/lldb/test/CMakeLists.txt b/lldb/test/CMakeLists.txt index 5ac474736eb63dd..25037934f636c73 100644 --- a/lldb/test/CMakeLists.txt +++ b/lldb/test/CMakeLists.txt @@ -235,6 +235,8 @@ if (CMAKE_SIZEOF_VOID_P EQUAL 8) set(LLDB_IS_64_BITS 1) endif() +set(LLDB_TEST_SHELL_DISABLE_REMOTE OFF CACHE BOOL "Disable remote Shell tests execution") + # These values are not canonicalized within LLVM. llvm_canonicalize_cmake_booleans( LLDB_BUILD_INTEL_PT @@ -244,6 +246,7 @@ llvm_canonicalize_cmake_booleans( LLVM_ENABLE_ZLIB LLVM_ENABLE_SHARED_LIBS LLDB_HAS_LIBCXX + LLDB_TEST_SHELL_DISABLE_REMOTE LLDB_TOOL_LLDB_SERVER_BUILD LLDB_USE_SYSTEM_DEBUGSERVER LLDB_IS_64_BITS) diff --git a/lldb/test/Shell/Expr/TestIRMemoryMap.test b/lldb/test/Shell/Expr/TestIRMemoryMap.test index 9dd0413be14cf0a..9cba0624c926889 100644 --- a/lldb/test/Shell/Expr/TestIRMemoryMap.test +++ b/lldb/test/Shell/Expr/TestIRMemoryMap.test @@ -1,4 +1,4 @@ -# UNSUPPORTED: system-windows +# UNSUPPORTED: system-windows, remote-linux # RUN: %clangxx_host %p/Inputs/call-function.cpp -g -o %t diff --git a/lldb/test/Shell/Settings/TestEchoCommands.test b/lldb/test/Shell/Settings/TestEchoCommands.test index 234b9742bfa2aad..3488e707321863a 100644 --- a/lldb/test/Shell/Settings/TestEchoCommands.test +++ b/lldb/test/Shell/Settings/TestEchoCommands.test @@ -1,8 +1,3 @@ # RUN: %lldb -x -b -o 'settings set interpreter.echo-comment-commands true' -s %S/Inputs/EchoCommandsTest.in | FileCheck %S/Inputs/EchoCommandsAll.out # RUN: %lldb -x -b -o 'settings set interpreter.echo-comment-commands false' -s %S/Inputs/EchoCommandsTest.in | FileCheck %S/Inputs/EchoCommandsNoComments.out # RUN: %lldb -x -b -o 'settings set interpreter.echo-commands false' -s %S/Inputs/EchoCommandsTest.in | FileCheck %S/Inputs/EchoCommandsNone.out - -RUN: echo start >%t.file -RUN: %lldb -x -b --source-quietly -s %S/Inputs/EchoCommandsTest.in >>%t.file -RUN: echo done >>%t.file -RUN: FileCheck %S/Inputs/EchoCommandsQuiet.out <%t.file diff --git a/lldb/test/Shell/Settings/TestEchoCommandsQuiet.test b/lldb/test/Shell/Settings/TestEchoCommandsQuiet.test new file mode 100644 index 000000000000000..88c4f598e697481 --- /dev/null +++ b/lldb/test/Shell/Settings/TestEchoCommandsQuiet.test @@ -0,0 +1,6 @@ +Can't mute platform select command. +UNSUPPORTED: remote-linux +RUN: echo start >%t.file +RUN: %lldb -x -b --source-quietly -s %S/Inputs/EchoCommandsTest.in >>%t.file +RUN: echo done >>%t.file +RUN: FileCheck %S/Inputs/EchoCommandsQuiet.out <%t.file diff --git a/lldb/test/Shell/Target/target-label.test b/lldb/test/Shell/Target/target-label.test index 5ac430601e29ac5..7f4f31e09fa1648 100644 --- a/lldb/test/Shell/Target/target-label.test +++ b/lldb/test/Shell/Target/target-label.test @@ -4,16 +4,16 @@ target create -l "ls" /bin/ls target list -# CHECK: * target #0 (ls): /bin/ls +# CHECK: * target #0 (ls): [[LS_PATH:.*]] script lldb.target.SetLabel("") target list -# CHECK: * target #0: /bin/ls +# CHECK: * target #0: [[LS_PATH]] target create -l "cat" /bin/cat target list -# CHECK: target #0: /bin/ls -# CHECK-NEXT: * target #1 (cat): /bin/cat +# CHECK: target #0: [[LS_PATH]] +# CHECK-NEXT: * target #1 (cat): [[CAT_PATH:.*]] target create -l "cat" /bin/cat # CHECK: Cannot use label 'cat' since it's set in target #1. @@ -22,12 +22,12 @@ target create -l 42 /bin/cat # CHECK: error: Cannot use integer as target label. target select 0 -# CHECK: * target #0: /bin/ls -# CHECK-NEXT: target #1 (cat): /bin/cat +# CHECK: * target #0: [[LS_PATH]] +# CHECK-NEXT: target #1 (cat): [[CAT_PATH]] target select cat -# CHECK: target #0: /bin/ls -# CHECK-NEXT: * target #1 (cat): /bin/cat +# CHECK: target #0: [[LS_PATH]] +# CHECK-NEXT: * target #1 (cat): [[CAT_PATH]] script lldb.target.GetLabel() # CHECK: 'cat' @@ -36,5 +36,5 @@ script lldb.debugger.GetTargetAtIndex(0).SetLabel('Not cat') # CHECK: success target list -# CHECK: target #0 (Not cat): /bin/ls -# CHECK-NEXT: * target #1 (cat): /bin/cat +# CHECK: target #0 (Not cat): [[LS_PATH]] +# CHECK-NEXT: * target #1 (cat): [[CAT_PATH]] diff --git a/lldb/test/Shell/helper/toolchain.py b/lldb/test/Shell/helper/toolchain.py index 255955fc70d8c41..81ae490f6a7dc48 100644 --- a/lldb/test/Shell/helper/toolchain.py +++ b/lldb/test/Shell/helper/toolchain.py @@ -1,14 +1,17 @@ import os import itertools import platform +import re import subprocess import sys import lit.util +from lit.formats import ShTest from lit.llvm import llvm_config from lit.llvm.subst import FindTool from lit.llvm.subst import ToolSubst +import posixpath def _get_lldb_init_path(config): return os.path.join(config.test_exec_root, "lit-lldb-init-quiet") @@ -22,6 +25,68 @@ def _disallow(config, execName): config.substitutions.append((" {0} ".format(execName), warning.format(execName))) +def get_lldb_args(config, suffix=""): + lldb_args = [] + if "remote-linux" in config.available_features: + lldb_args += [ + "-O", + '"platform select remote-linux"', + "-O", + f'"platform connect {config.lldb_platform_url}"', + ] + if config.lldb_platform_working_dir: + dir = posixpath.join(f"{config.lldb_platform_working_dir}", "shell") + if suffix: + dir += posixpath.join(dir, f"{suffix}") + lldb_args += [ + "-O", + f'"platform shell mkdir -p {dir}"', + "-O", + f'"platform settings -w {dir}"', + ] + lldb_args += ["--no-lldbinit", "-S", _get_lldb_init_path(config)] + return lldb_args + + +class ShTestLldb(ShTest): + def __init__( + self, execute_external=False, extra_substitutions=[], preamble_commands=[] + ): + super().__init__(execute_external, extra_substitutions, preamble_commands) + + def execute(self, test, litConfig): + # Run each Shell test in a separate directory (on remote). + + # Find directory change command in %lldb substitution. + for i, t in enumerate(test.config.substitutions): + if re.match(t[0], "%lldb"): + cmd = t[1] + if '-O "platform settings -w ' in cmd: + # If command is present, it is added by get_lldb_args. + # Replace the path with the tests' path in suite. + # Example: + # bin/lldb + # -O "platform shell mkdir -p /home/user/shell" + # -O "platform settings -w /home/user/shell" ... + # => + # bin/lldb + # -O "platform shell mkdir -p /home/user/shell/SymbolFile/Breakpad/inline-record.test" + # -O "platform settings -w /home/user/shell/SymbolFile/Breakpad/inline-record.test" ... + args_def = " ".join(get_lldb_args(test.config)) + args_unique = " ".join( + get_lldb_args( + test.config, + posixpath.join(*test.path_in_suite), + ) + ) + test.config.substitutions[i] = ( + t[0], + cmd.replace(args_def, args_unique), + ) + break + return super().execute(test, litConfig) + + def use_lldb_substitutions(config): # Set up substitutions for primary tools. These tools must come from config.lldb_tools_dir # which is basically the build output directory. We do not want to find these in path or @@ -34,7 +99,9 @@ def use_lldb_substitutions(config): build_script = os.path.join(build_script, "build.py") build_script_args = [ build_script, - "--compiler=any", # Default to best compiler + ( + "--compiler=clang" if config.enable_remote else "--compiler=any" + ), # Default to best compiler "--arch=" + str(config.lldb_bitness), ] if config.lldb_lit_tools_dir: @@ -56,7 +123,7 @@ def use_lldb_substitutions(config): ToolSubst( "%lldb", command=FindTool("lldb"), - extra_args=["--no-lldbinit", "-S", lldb_init], + extra_args=get_lldb_args(config), unresolved="fatal", ), ToolSubst( @@ -138,7 +205,10 @@ def use_support_substitutions(config): # Set up substitutions for support tools. These tools can be overridden at the CMake # level (by specifying -DLLDB_LIT_TOOLS_DIR), installed, or as a last resort, we can use # the just-built version. - host_flags = ["--target=" + config.host_triple] + if config.enable_remote: + host_flags = ["--target=" + config.target_triple] + else: + host_flags = ["--target=" + config.host_triple] if platform.system() in ["Darwin"]: try: out = subprocess.check_output(["xcrun", "--show-sdk-path"]).strip() @@ -165,6 +235,12 @@ def use_support_substitutions(config): if config.cmake_sysroot: host_flags += ["--sysroot={}".format(config.cmake_sysroot)] + if config.enable_remote and config.has_libcxx: + host_flags += [ + "-L{}".format(config.libcxx_libs_dir), + "-lc++", + ] + host_flags = " ".join(host_flags) config.substitutions.append(("%clang_host", "%clang " + host_flags)) config.substitutions.append(("%clangxx_host", "%clangxx " + host_flags)) diff --git a/lldb/test/Shell/lit.cfg.py b/lldb/test/Shell/lit.cfg.py index d764cfa20ea8379..9a7b71889f8dff0 100644 --- a/lldb/test/Shell/lit.cfg.py +++ b/lldb/test/Shell/lit.cfg.py @@ -21,7 +21,7 @@ config.name = "lldb-shell" # testFormat: The test format to use to interpret tests. -config.test_format = lit.formats.ShTest(not llvm_config.use_lit_shell) +config.test_format = toolchain.ShTestLldb(not llvm_config.use_lit_shell) # suffixes: A list of file extensions to treat as test files. This is overriden # by individual lit.local.cfg files in the test subdirectories. @@ -68,6 +68,13 @@ lit_config.note("Running Shell tests in {} mode.".format(lldb_repro_mode)) toolchain.use_lldb_repro_substitutions(config, lldb_repro_mode) +if config.lldb_platform_url and config.cmake_sysroot and config.enable_remote: + if re.match(r".*-linux.*", config.target_triple): + config.available_features.add("remote-linux") +else: + # After this, enable_remote == True iff remote testing is going to be used. + config.enable_remote = False + llvm_config.use_default_substitutions() toolchain.use_lldb_substitutions(config) toolchain.use_support_substitutions(config) diff --git a/lldb/test/Shell/lit.site.cfg.py.in b/lldb/test/Shell/lit.site.cfg.py.in index b69e7bce1bc0be6..31a6d68618b770d 100644 --- a/lldb/test/Shell/lit.site.cfg.py.in +++ b/lldb/test/Shell/lit.site.cfg.py.in @@ -10,10 +10,15 @@ config.lldb_src_root = "@LLDB_SOURCE_DIR@" config.lldb_obj_root = "@LLDB_BINARY_DIR@" config.lldb_libs_dir = lit_config.substitute("@LLDB_LIBS_DIR@") config.lldb_tools_dir = lit_config.substitute("@LLDB_TOOLS_DIR@") +config.lldb_platform_url = lit_config.substitute("@LLDB_TEST_PLATFORM_URL@") +config.lldb_platform_working_dir = lit_config.substitute("@LLDB_TEST_PLATFORM_WORKING_DIR@") # Since it comes from the command line, it may have backslashes which # should not need to be escaped. config.lldb_lit_tools_dir = lit_config.substitute(r"@LLDB_LIT_TOOLS_DIR@") -config.cmake_sysroot = lit_config.substitute("@CMAKE_SYSROOT@") +config.cmake_sysroot = lit_config.substitute("@LLDB_TEST_SYSROOT@" or "@DEFAULT_SYSROOT@") +config.has_libcxx = @LLDB_HAS_LIBCXX@ +config.enable_remote = not @LLDB_TEST_SHELL_DISABLE_REMOTE@ +config.libcxx_libs_dir = "@LIBCXX_LIBRARY_DIR@" config.target_triple = "@LLVM_TARGET_TRIPLE@" config.python_executable = "@Python3_EXECUTABLE@" config.have_zlib = @LLVM_ENABLE_ZLIB@ diff --git a/lldb/tools/lldb-dap/JSONUtils.cpp b/lldb/tools/lldb-dap/JSONUtils.cpp index 4f9c9c01cf4b6bb..558f889c4b7f232 100644 --- a/lldb/tools/lldb-dap/JSONUtils.cpp +++ b/lldb/tools/lldb-dap/JSONUtils.cpp @@ -152,6 +152,31 @@ std::vector GetStrings(const llvm::json::Object *obj, return strs; } +std::unordered_map +GetStringMap(const llvm::json::Object &obj, llvm::StringRef key) { + std::unordered_map strs; + const auto *const json_object = obj.getObject(key); + if (!json_object) + return strs; + + for (const auto &[key, value] : *json_object) { + switch (value.kind()) { + case llvm::json::Value::String: + strs.emplace(key.str(), value.getAsString()->str()); + break; + case llvm::json::Value::Number: + case llvm::json::Value::Boolean: + strs.emplace(key.str(), llvm::to_string(value)); + break; + case llvm::json::Value::Null: + case llvm::json::Value::Object: + case llvm::json::Value::Array: + break; + } + } + return strs; +} + static bool IsClassStructOrUnionType(lldb::SBType t) { return (t.GetTypeClass() & (lldb::eTypeClassUnion | lldb::eTypeClassStruct | lldb::eTypeClassArray)) != 0; @@ -1439,16 +1464,22 @@ CreateRunInTerminalReverseRequest(const llvm::json::Object &launch_request, if (!cwd.empty()) run_in_terminal_args.try_emplace("cwd", cwd); - // We need to convert the input list of environments variables into a - // dictionary - std::vector envs = GetStrings(launch_request_arguments, "env"); - llvm::json::Object environment; - for (const std::string &env : envs) { - size_t index = env.find('='); - environment.try_emplace(env.substr(0, index), env.substr(index + 1)); + auto envs = GetEnvironmentFromArguments(*launch_request_arguments); + llvm::json::Object env_json; + for (size_t index = 0, env_count = envs.GetNumValues(); index < env_count; + index++) { + llvm::StringRef key = envs.GetNameAtIndex(index); + llvm::StringRef value = envs.GetValueAtIndex(index); + + if (key.empty()) + g_dap.SendOutput(OutputType::Stderr, + "empty environment variable for value: \"" + + value.str() + '\"'); + else + env_json.try_emplace(key, value); } run_in_terminal_args.try_emplace("env", - llvm::json::Value(std::move(environment))); + llvm::json::Value(std::move(env_json))); return run_in_terminal_args; } diff --git a/lldb/tools/lldb-dap/JSONUtils.h b/lldb/tools/lldb-dap/JSONUtils.h index 13018458ffe0ad9..18cfb4081fece18 100644 --- a/lldb/tools/lldb-dap/JSONUtils.h +++ b/lldb/tools/lldb-dap/JSONUtils.h @@ -16,6 +16,7 @@ #include "llvm/Support/JSON.h" #include #include +#include namespace lldb_dap { @@ -159,6 +160,27 @@ DecodeMemoryReference(llvm::StringRef memoryReference); std::vector GetStrings(const llvm::json::Object *obj, llvm::StringRef key); +/// Extract an object of key value strings for the specified key from an object. +/// +/// String values in the object will be extracted without any quotes +/// around them. Numbers and Booleans will be converted into +/// strings. Any NULL, array or objects values in the array will be +/// ignored. +/// +/// \param[in] obj +/// A JSON object that we will attempt to extract the array from +/// +/// \param[in] key +/// The key to use when extracting the value +/// +/// \return +/// An object of key value strings for the specified \a key, or +/// \a fail_value if there is no key that matches or if the +/// value is not an object or key and values in the object are not +/// strings, numbers or booleans. +std::unordered_map +GetStringMap(const llvm::json::Object &obj, llvm::StringRef key); + /// Fill a response object given the request object. /// /// The \a response object will get its "type" set to "response", diff --git a/lldb/tools/lldb-dap/LLDBUtils.cpp b/lldb/tools/lldb-dap/LLDBUtils.cpp index a74b32609a167b7..b38833c0fdb6b6f 100644 --- a/lldb/tools/lldb-dap/LLDBUtils.cpp +++ b/lldb/tools/lldb-dap/LLDBUtils.cpp @@ -135,4 +135,29 @@ int64_t MakeDAPFrameID(lldb::SBFrame &frame) { frame.GetFrameID(); } +lldb::SBEnvironment +GetEnvironmentFromArguments(const llvm::json::Object &arguments) { + lldb::SBEnvironment envs{}; + constexpr llvm::StringRef env_key = "env"; + const llvm::json::Value *raw_json_env = arguments.get(env_key); + + if (!raw_json_env) + return envs; + + if (raw_json_env->kind() == llvm::json::Value::Object) { + auto env_map = GetStringMap(arguments, env_key); + for (const auto &[key, value] : env_map) + envs.Set(key.c_str(), value.c_str(), true); + + } else if (raw_json_env->kind() == llvm::json::Value::Array) { + const auto envs_strings = GetStrings(&arguments, env_key); + lldb::SBStringList entries{}; + for (const auto &env : envs_strings) + entries.AppendString(env.c_str()); + + envs.SetEntries(entries, true); + } + return envs; +} + } // namespace lldb_dap diff --git a/lldb/tools/lldb-dap/LLDBUtils.h b/lldb/tools/lldb-dap/LLDBUtils.h index ee701da2230fe00..d5072d19029a1e5 100644 --- a/lldb/tools/lldb-dap/LLDBUtils.h +++ b/lldb/tools/lldb-dap/LLDBUtils.h @@ -10,11 +10,12 @@ #define LLDB_TOOLS_LLDB_DAP_LLDBUTILS_H #include "DAPForward.h" +#include "lldb/API/SBEnvironment.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Support/JSON.h" #include "llvm/Support/raw_ostream.h" #include -#include namespace lldb_dap { @@ -135,6 +136,17 @@ uint32_t GetLLDBThreadIndexID(uint64_t dap_frame_id); /// The LLDB frame index ID. uint32_t GetLLDBFrameID(uint64_t dap_frame_id); +/// Gets all the environment variables from the json object depending on if the +/// kind is an object or an array. +/// +/// \param[in] arguments +/// The json object with the launch options +/// +/// \return +/// The environment variables stored in the env key +lldb::SBEnvironment +GetEnvironmentFromArguments(const llvm::json::Object &arguments); + } // namespace lldb_dap #endif diff --git a/lldb/tools/lldb-dap/README.md b/lldb/tools/lldb-dap/README.md index 7248df310c14a9d..3a7d82e887cca3f 100644 --- a/lldb/tools/lldb-dap/README.md +++ b/lldb/tools/lldb-dap/README.md @@ -36,7 +36,10 @@ adds `FOO=1` and `bar` to the environment: "name": "Debug", "program": "/tmp/a.out", "args": [ "one", "two", "three" ], - "env": [ "FOO=1", "BAR" ], + "env": { + "FOO": "1" + "BAR": "" + } } ``` diff --git a/lldb/tools/lldb-dap/lldb-dap.cpp b/lldb/tools/lldb-dap/lldb-dap.cpp index 0b6d6402410654a..ac18e8f24a4e39a 100644 --- a/lldb/tools/lldb-dap/lldb-dap.cpp +++ b/lldb/tools/lldb-dap/lldb-dap.cpp @@ -53,6 +53,7 @@ #include #include +#include "lldb/API/SBEnvironment.h" #include "lldb/API/SBStream.h" #include "lldb/Host/Config.h" #include "llvm/ADT/ArrayRef.h" @@ -610,25 +611,32 @@ void SetSourceMapFromArguments(const llvm::json::Object &arguments) { std::string sourceMapCommand; llvm::raw_string_ostream strm(sourceMapCommand); strm << "settings set target.source-map "; - auto sourcePath = GetString(arguments, "sourcePath"); + const auto sourcePath = GetString(arguments, "sourcePath"); // sourceMap is the new, more general form of sourcePath and overrides it. - auto sourceMap = arguments.getArray("sourceMap"); - if (sourceMap) { - for (const auto &value : *sourceMap) { - auto mapping = value.getAsArray(); + constexpr llvm::StringRef sourceMapKey = "sourceMap"; + + if (const auto *sourceMapArray = arguments.getArray(sourceMapKey)) { + for (const auto &value : *sourceMapArray) { + const auto *mapping = value.getAsArray(); if (mapping == nullptr || mapping->size() != 2 || (*mapping)[0].kind() != llvm::json::Value::String || (*mapping)[1].kind() != llvm::json::Value::String) { g_dap.SendOutput(OutputType::Console, llvm::StringRef(sourceMapHelp)); return; } - auto mapFrom = GetAsString((*mapping)[0]); - auto mapTo = GetAsString((*mapping)[1]); + const auto mapFrom = GetAsString((*mapping)[0]); + const auto mapTo = GetAsString((*mapping)[1]); strm << "\"" << mapFrom << "\" \"" << mapTo << "\" "; } + } else if (const auto *sourceMapObj = arguments.getObject(sourceMapKey)) { + for (const auto &[key, value] : *sourceMapObj) { + if (value.kind() == llvm::json::Value::String) { + strm << "\"" << key.str() << "\" \"" << GetAsString(value) << "\" "; + } + } } else { - if (ObjectContainsKey(arguments, "sourceMap")) { + if (ObjectContainsKey(arguments, sourceMapKey)) { g_dap.SendOutput(OutputType::Console, llvm::StringRef(sourceMapHelp)); return; } @@ -2069,9 +2077,8 @@ lldb::SBError LaunchProcess(const llvm::json::Object &request) { launch_info.SetArguments(MakeArgv(args).data(), true); // Pass any environment variables along that the user specified. - auto envs = GetStrings(arguments, "env"); - if (!envs.empty()) - launch_info.SetEnvironmentEntries(MakeArgv(envs).data(), true); + const auto envs = GetEnvironmentFromArguments(*arguments); + launch_info.SetEnvironment(envs, true); auto flags = launch_info.GetLaunchFlags(); diff --git a/lldb/tools/lldb-dap/package.json b/lldb/tools/lldb-dap/package.json index 480929703e4b56b..9155163c65ba5ce 100644 --- a/lldb/tools/lldb-dap/package.json +++ b/lldb/tools/lldb-dap/package.json @@ -170,9 +170,27 @@ "default": "${workspaceRoot}" }, "env": { - "type": "array", - "description": "Additional environment variables to set when launching the program. This is an array of strings that contains the variable name followed by an optional '=' character and the environment variable's value.", - "default": [] + "anyOf": [ + { + "type": "object", + "description": "Additional environment variables to set when launching the program. E.g. `{ \"FOO\": \"1\" }`", + "patternProperties": { + ".*": { + "type": "string" + } + }, + "default": {} + }, + { + "type": "array", + "description": "Additional environment variables to set when launching the program. E.g. `[\"FOO=1\", \"BAR\"]`", + "items": { + "type": "string", + "pattern": "^((\\w+=.*)|^\\w+)$" + }, + "default": [] + } + ] }, "stopOnEntry": { "type": "boolean", @@ -204,9 +222,31 @@ "description": "Specify a source path to remap \"./\" to allow full paths to be used when setting breakpoints in binaries that have relative source paths." }, "sourceMap": { - "type": "array", - "description": "Specify an array of path remappings; each element must itself be a two element array containing a source and destination path name. Overrides sourcePath.", - "default": [] + "anyOf": [ + { + "type": "object", + "description": "Specify an object of path remappings; each entry has a key containing the source path and a value containing the destination path. E.g `{ \"/the/source/path\": \"/the/destination/path\" }`. Overrides sourcePath.", + "patternProperties": { + ".*": { + "type": "string" + } + }, + "default": {} + }, + { + "type": "array", + "description": "Specify an array of path remappings; each element must itself be a two element array containing a source and destination path name. Overrides sourcePath.", + "items": { + "type": "array", + "minItems": 2, + "maxItems": 2, + "items": { + "type": "string" + } + }, + "default": [] + } + ] }, "debuggerRoot": { "type": "string", @@ -319,9 +359,31 @@ "description": "Specify a source path to remap \"./\" to allow full paths to be used when setting breakpoints in binaries that have relative source paths." }, "sourceMap": { - "type": "array", - "description": "Specify an array of path remappings; each element must itself be a two element array containing a source and destination path name. Overrides sourcePath.", - "default": [] + "anyOf": [ + { + "type": "object", + "description": "Specify an object of path remappings; each entry has a key containing the source path and a value containing the destination path. E.g `{ \"/the/source/path\": \"/the/destination/path\" }`. Overrides sourcePath.", + "patternProperties": { + ".*": { + "type": "string" + } + }, + "default": {} + }, + { + "type": "array", + "description": "Specify an array of path remappings; each element must itself be a two element array containing a source and destination path name. Overrides sourcePath.", + "items": { + "type": "array", + "minItems": 2, + "maxItems": 2, + "items": { + "type": "string" + } + }, + "default": [] + } + ] }, "debuggerRoot": { "type": "string", @@ -451,4 +513,4 @@ } ] } -} +} \ No newline at end of file diff --git a/llvm/Maintainers.md b/llvm/Maintainers.md index 082b918c2598729..0dc0ed71858f84d 100644 --- a/llvm/Maintainers.md +++ b/llvm/Maintainers.md @@ -16,8 +16,8 @@ assistance. The lead maintainer is responsible for all parts of LLVM not covered by somebody else. -Chris Lattner \ -sabre@nondot.org (email), [lattner](https://github.com/lattner) (GitHub), clattner (Discourse) +Nikita Popov \ +llvm@npopov.com, npopov@redhat.com (email), [nikic](https://github.com/nikic) (GitHub), nikic (Discourse) ### Transforms and analyses @@ -426,6 +426,9 @@ help with the success of the project! ### Emeritus lead maintainers +Chris Lattner \ +sabre@nondot.org (email), [lattner](https://github.com/lattner) (GitHub), clattner (Discourse) + ### Inactive or former component maintainers Hans Wennborg (hans@chromium.org, [zmodem](https://github.com/zmodem)) -- Release management diff --git a/llvm/docs/CMake.rst b/llvm/docs/CMake.rst index 321bae48594cf97..4364029f71c6dc5 100644 --- a/llvm/docs/CMake.rst +++ b/llvm/docs/CMake.rst @@ -309,6 +309,8 @@ These variables provide fine control over the build of LLVM and enabled sub-projects. Nearly all of these variable names begin with ``LLVM_``. +.. _LLVM-related variables BUILD_SHARED_LIBS: + **BUILD_SHARED_LIBS**:BOOL Flag indicating if each LLVM component (e.g. Support) is built as a shared library (ON) or as a static library (OFF). Its default value is OFF. On diff --git a/llvm/docs/GettingStarted.rst b/llvm/docs/GettingStarted.rst index e03ae5effcdc445..8ef1f85d6b6fd7b 100644 --- a/llvm/docs/GettingStarted.rst +++ b/llvm/docs/GettingStarted.rst @@ -1085,67 +1085,89 @@ general questions about LLVM, please consult the `Frequently Asked Questions `_ page. If you are having problems with limited memory and build time, please try -building with ninja instead of make. Please consider configuring the +building with ``ninja`` instead of ``make``. Please consider configuring the following options with cmake: - * -G Ninja + * ``-G Ninja`` + Setting this option will allow you to build with ninja instead of make. Building with ninja significantly improves your build time, especially with incremental builds, and improves your memory usage. - * -DLLVM_USE_LINKER - Setting this option to lld will significantly reduce linking time for LLVM - executables on ELF-based platforms, such as Linux. If you are building LLVM + * ``-DLLVM_USE_LINKER`` + + Setting this option to ``lld`` will significantly reduce linking time for LLVM + executables, particularly on Linux and Windows. If you are building LLVM for the first time and lld is not available to you as a binary package, then you may want to use the gold linker as a faster alternative to GNU ld. - * -DCMAKE_BUILD_TYPE + * ``-DCMAKE_BUILD_TYPE`` + Controls optimization level and debug information of the build. This setting can affect RAM and disk usage, see :ref:`CMAKE_BUILD_TYPE ` for more information. - * -DLLVM_ENABLE_ASSERTIONS - This option defaults to ON for Debug builds and defaults to OFF for Release + * ``-DLLVM_ENABLE_ASSERTIONS`` + + This option defaults to ``ON`` for Debug builds and defaults to ``OFF`` for Release builds. As mentioned in the previous option, using the Release build type and enabling assertions may be a good alternative to using the Debug build type. - * -DLLVM_PARALLEL_LINK_JOBS + * ``-DLLVM_PARALLEL_LINK_JOBS`` + Set this equal to number of jobs you wish to run simultaneously. This is - similar to the -j option used with make, but only for link jobs. This option + similar to the ``-j`` option used with ``make``, but only for link jobs. This option can only be used with ninja. You may wish to use a very low number of jobs, as this will greatly reduce the amount of memory used during the build - process. If you have limited memory, you may wish to set this to 1. + process. If you have limited memory, you may wish to set this to ``1``. + + * ``-DLLVM_TARGETS_TO_BUILD`` - * -DLLVM_TARGETS_TO_BUILD Set this equal to the target you wish to build. You may wish to set this to - X86; however, you will find a full list of targets within the - llvm-project/llvm/lib/Target directory. + only your host architecture. For example ``X86`` if you are using an Intel or + AMD machine. You will find a full list of targets within the + `llvm-project/llvm/lib/Target `_ + directory. + + * ``-DLLVM_OPTIMIZED_TABLEGEN`` - * -DLLVM_OPTIMIZED_TABLEGEN - Set this to ON to generate a fully optimized tablegen during your build. This - will significantly improve your build time. This is only useful if you are - using the Debug build type. + Set this to ``ON`` to generate a fully optimized TableGen compiler during your + build, even if that build is a ``Debug`` build. This will significantly improve + your build time. You should not enable this if your intention is to debug the + TableGen compiler. - * -DLLVM_ENABLE_PROJECTS - Set this equal to the projects you wish to compile (e.g. clang, lld, etc.) If + * ``-DLLVM_ENABLE_PROJECTS`` + + Set this equal to the projects you wish to compile (e.g. ``clang``, ``lld``, etc.) If compiling more than one project, separate the items with a semicolon. Should you run into issues with the semicolon, try surrounding it with single quotes. - * -DLLVM_ENABLE_RUNTIMES - Set this equal to the runtimes you wish to compile (e.g. libcxx, libcxxabi, etc.) + * ``-DLLVM_ENABLE_RUNTIMES`` + + Set this equal to the runtimes you wish to compile (e.g. ``libcxx``, ``libcxxabi``, etc.) If compiling more than one runtime, separate the items with a semicolon. Should you run into issues with the semicolon, try surrounding it with single quotes. - * -DCLANG_ENABLE_STATIC_ANALYZER - Set this option to OFF if you do not require the clang static analyzer. This + * ``-DCLANG_ENABLE_STATIC_ANALYZER`` + + Set this option to ``OFF`` if you do not require the clang static analyzer. This should improve your build time slightly. - * -DLLVM_USE_SPLIT_DWARF - Consider setting this to ON if you require a debug build, as this will ease + * ``-DLLVM_USE_SPLIT_DWARF`` + + Consider setting this to ``ON`` if you require a debug build, as this will ease memory pressure on the linker. This will make linking much faster, as the - binaries will not contain any of the debug information; however, this will - generate the debug information in the form of a DWARF object file (with the - extension .dwo). This only applies to host platforms using ELF, such as Linux. + binaries will not contain any of the debug information. Instead the debug + information is in a separate DWARF object file (with the extension ``.dwo``). + This only applies to host platforms using ELF, such as Linux. + + * ``-DBUILD_SHARED_LIBS`` + + Setting this to ``ON`` will build shared libraries instead of static + libraries. This will ease memory pressure on the linker. However, this should + only be used when developing llvm. See + :ref:`BUILD_SHARED_LIBS ` + for more information. .. _links: diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 6fa35486669d69e..5527e4a8818a550 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -8047,6 +8047,43 @@ it will contain a list of ids, including the ids of the callsites in the full inline sequence, in order from the leaf-most call's id to the outermost inlined call. + +'``noalias.addrspace``' Metadata +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The ``noalias.addrspace`` metadata is used to identify memory +operations which cannot access objects allocated in a range of address +spaces. It is attached to memory instructions, including +:ref:`atomicrmw `, :ref:`cmpxchg `, and +:ref:`call ` instructions. + +This follows the same form as :ref:`range metadata `, +except the field entries must be of type `i32`. The interpretation is +the same numeric address spaces as applied to IR values. + +Example: + +.. code-block:: llvm + + ; %ptr cannot point to an object allocated in addrspace(5) + %rmw.valid = atomicrmw and ptr %ptr, i64 %value seq_cst, !noalias.addrspace !0 + + ; Undefined behavior. The underlying object is allocated in one of the listed + ; address spaces. + %alloca = alloca i64, addrspace(5) + %alloca.cast = addrspacecast ptr addrspace(5) %alloca to ptr + %rmw.ub = atomicrmw and ptr %alloca.cast, i64 %value seq_cst, !noalias.addrspace !0 + + !0 = !{i32 5, i32 6} ; Exclude addrspace(5) only + + +This is intended for use on targets with a notion of generic address +spaces, which at runtime resolve to different physical memory +spaces. The interpretation of the address space values is target +specific. The behavior is undefined if the runtime memory address does +resolve to an object defined in one of the indicated address spaces. + + Module Flags Metadata ===================== diff --git a/llvm/docs/RISCVUsage.rst b/llvm/docs/RISCVUsage.rst index d22f642865bb3af..5736f3807f131b9 100644 --- a/llvm/docs/RISCVUsage.rst +++ b/llvm/docs/RISCVUsage.rst @@ -426,6 +426,20 @@ line. This currently applies to the following extensions: No extensions have experimental intrinsics. +Long (>32-bit) Instruction Support +================================== + +RISC-V is a variable-length ISA, but the standard currently only defines 16- and 32-bit instructions. The specification describes longer instruction encodings, but these are not ratified. + +The LLVM disassembler, `llvm-objdump`, does use the longer instruction encodings described in the specification to guess the instruction length (up to 176 bits) and will group the disassembly view of encoding bytes correspondingly. + +The LLVM integrated assembler for RISC-V supports two different kinds of ``.insn`` directive, for assembling instructions that LLVM does not yet support: + +* ``.insn type, args*`` which takes a known instruction type, and a list of fields. You are strongly recommended to use this variant of the directive if your instruction fits an existing instruction type. +* ``.insn [ length , ] encoding`` which takes an (optional) explicit length (in bytes) and a raw encoding for the instruction. When given an explicit length, this variant can encode instructions up to 64 bits long. The encoding part of the directive must be given all bits for the instruction, none are filled in for the user. When used without the optional length, this variant of the directive will use the LSBs of the raw encoding to work out if an instruction is 16 or 32 bits long. LLVM does not infer that an instruction might be longer than 32 bits - in this case, the user must give the length explicitly. + +It is strongly recommended to use the ``.insn`` directive for assembling unsupported instructions instead of ``.word`` or ``.hword``, because it will produce the correct mapping symbols to mark the word as an instruction, not data. + Global Pointer (GP) Relaxation and the Small Data Limit ======================================================= diff --git a/llvm/docs/ReleaseNotes.md b/llvm/docs/ReleaseNotes.md index f44d636a203374f..8ac5900a7e532ed 100644 --- a/llvm/docs/ReleaseNotes.md +++ b/llvm/docs/ReleaseNotes.md @@ -62,6 +62,8 @@ Changes to the LLVM IR * Added `usub_cond` and `usub_sat` operations to `atomicrmw`. +* Introduced `noalias.addrspace` metadata. + * Remove the following intrinsics which can be replaced with a `bitcast`: * `llvm.nvvm.bitcast.f2i` diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h index 89a85bc8a908640..2befacea4df866b 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -884,6 +884,9 @@ class TargetTransformInfo { bool isTargetIntrinsicTriviallyScalarizable(Intrinsic::ID ID) const; + bool isTargetIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, + unsigned ScalarOpdIdx) const; + /// Estimate the overhead of scalarizing an instruction. Insert and Extract /// are set if the demanded result elements need to be inserted and/or /// extracted from vectors. @@ -1935,6 +1938,8 @@ class TargetTransformInfo::Concept { virtual bool shouldBuildRelLookupTables() = 0; virtual bool useColdCCForColdCall(Function &F) = 0; virtual bool isTargetIntrinsicTriviallyScalarizable(Intrinsic::ID ID) = 0; + virtual bool isTargetIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, + unsigned ScalarOpdIdx) = 0; virtual InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, @@ -2477,6 +2482,12 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept { bool isTargetIntrinsicTriviallyScalarizable(Intrinsic::ID ID) override { return Impl.isTargetIntrinsicTriviallyScalarizable(ID); } + + bool isTargetIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, + unsigned ScalarOpdIdx) override { + return Impl.isTargetIntrinsicWithScalarOpAtArg(ID, ScalarOpdIdx); + } + InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h index 50040dc8f6165be..01a16e7c7b1e59a 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -377,6 +377,11 @@ class TargetTransformInfoImplBase { return false; } + bool isTargetIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, + unsigned ScalarOpdIdx) const { + return false; + } + InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, diff --git a/llvm/include/llvm/AsmParser/LLLexer.h b/llvm/include/llvm/AsmParser/LLLexer.h index bd929db33c4a2bb..a9f51fb925f5d5e 100644 --- a/llvm/include/llvm/AsmParser/LLLexer.h +++ b/llvm/include/llvm/AsmParser/LLLexer.h @@ -28,7 +28,20 @@ namespace llvm { class LLLexer { const char *CurPtr; StringRef CurBuf; - SMDiagnostic &ErrorInfo; + + enum class ErrorPriority { + None, // No error message present. + Parser, // Errors issued by parser. + Lexer, // Errors issued by lexer. + }; + + struct ErrorInfo { + ErrorPriority Priority = ErrorPriority::None; + SMDiagnostic &Error; + + explicit ErrorInfo(SMDiagnostic &Error) : Error(Error) {} + } ErrorInfo; + SourceMgr &SM; LLVMContext &Context; @@ -66,8 +79,13 @@ namespace llvm { IgnoreColonInIdentifiers = val; } - bool Error(LocTy ErrorLoc, const Twine &Msg) const; - bool Error(const Twine &Msg) const { return Error(getLoc(), Msg); } + // This returns true as a convenience for the parser functions that return + // true on error. + bool ParseError(LocTy ErrorLoc, const Twine &Msg) { + Error(ErrorLoc, Msg, ErrorPriority::Parser); + return true; + } + bool ParseError(const Twine &Msg) { return ParseError(getLoc(), Msg); } void Warning(LocTy WarningLoc, const Twine &Msg) const; void Warning(const Twine &Msg) const { return Warning(getLoc(), Msg); } @@ -97,7 +115,15 @@ namespace llvm { uint64_t atoull(const char *Buffer, const char *End); uint64_t HexIntToVal(const char *Buffer, const char *End); void HexToIntPair(const char *Buffer, const char *End, uint64_t Pair[2]); - void FP80HexToIntPair(const char *Buffer, const char *End, uint64_t Pair[2]); + void FP80HexToIntPair(const char *Buffer, const char *End, + uint64_t Pair[2]); + + void Error(LocTy ErrorLoc, const Twine &Msg, ErrorPriority Origin); + + void LexError(LocTy ErrorLoc, const Twine &Msg) { + Error(ErrorLoc, Msg, ErrorPriority::Lexer); + } + void LexError(const Twine &Msg) { LexError(getLoc(), Msg); } }; } // end namespace llvm diff --git a/llvm/include/llvm/AsmParser/LLParser.h b/llvm/include/llvm/AsmParser/LLParser.h index 9576b935198dd4a..1ef8b8ffc396608 100644 --- a/llvm/include/llvm/AsmParser/LLParser.h +++ b/llvm/include/llvm/AsmParser/LLParser.h @@ -207,11 +207,11 @@ namespace llvm { LLVMContext &getContext() { return Context; } private: - bool error(LocTy L, const Twine &Msg) const { return Lex.Error(L, Msg); } - bool tokError(const Twine &Msg) const { return error(Lex.getLoc(), Msg); } + bool error(LocTy L, const Twine &Msg) { return Lex.ParseError(L, Msg); } + bool tokError(const Twine &Msg) { return error(Lex.getLoc(), Msg); } bool checkValueID(LocTy L, StringRef Kind, StringRef Prefix, - unsigned NextID, unsigned ID) const; + unsigned NextID, unsigned ID); /// Restore the internal name and slot mappings using the mappings that /// were created at an earlier parsing stage. diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index c36a346c1b2e054..57d1fa33c8482c9 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -793,6 +793,11 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { return false; } + bool isTargetIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, + unsigned ScalarOpdIdx) const { + return false; + } + /// Helper wrapper for the DemandedElts variant of getScalarizationOverhead. InstructionCost getScalarizationOverhead(VectorType *InTy, bool Insert, bool Extract, diff --git a/llvm/include/llvm/IR/FixedMetadataKinds.def b/llvm/include/llvm/IR/FixedMetadataKinds.def index 5f4cc230a0f5ff1..df572e8791e13b4 100644 --- a/llvm/include/llvm/IR/FixedMetadataKinds.def +++ b/llvm/include/llvm/IR/FixedMetadataKinds.def @@ -52,3 +52,4 @@ LLVM_FIXED_MD_KIND(MD_pcsections, "pcsections", 37) LLVM_FIXED_MD_KIND(MD_DIAssignID, "DIAssignID", 38) LLVM_FIXED_MD_KIND(MD_coro_outside_frame, "coro.outside.frame", 39) LLVM_FIXED_MD_KIND(MD_mmra, "mmra", 40) +LLVM_FIXED_MD_KIND(MD_noalias_addrspace, "noalias.addrspace", 41) diff --git a/llvm/include/llvm/LTO/LTO.h b/llvm/include/llvm/LTO/LTO.h index a281c377f2601d5..66d8ca63a206f6a 100644 --- a/llvm/include/llvm/LTO/LTO.h +++ b/llvm/include/llvm/LTO/LTO.h @@ -26,6 +26,7 @@ #include "llvm/Support/Caching.h" #include "llvm/Support/Error.h" #include "llvm/Support/StringSaver.h" +#include "llvm/Support/ThreadPool.h" #include "llvm/Support/thread.h" #include "llvm/Transforms/IPO/FunctionAttrs.h" #include "llvm/Transforms/IPO/FunctionImport.h" @@ -105,7 +106,6 @@ void updateMemProfAttributes(Module &Mod, const ModuleSummaryIndex &Index); class LTO; struct SymbolResolution; -class ThinBackendProc; /// An input file. This is a symbol table wrapper that only exposes the /// information that an LTO client should need in order to do symbol resolution. @@ -194,14 +194,90 @@ class InputFile { } }; -/// A ThinBackend defines what happens after the thin-link phase during ThinLTO. -/// The details of this type definition aren't important; clients can only -/// create a ThinBackend using one of the create*ThinBackend() functions below. -using ThinBackend = std::function( +using IndexWriteCallback = std::function; + +/// This class defines the interface to the ThinLTO backend. +class ThinBackendProc { +protected: + const Config &Conf; + ModuleSummaryIndex &CombinedIndex; + const DenseMap &ModuleToDefinedGVSummaries; + IndexWriteCallback OnWrite; + bool ShouldEmitImportsFiles; + DefaultThreadPool BackendThreadPool; + std::optional Err; + std::mutex ErrMu; + +public: + ThinBackendProc( + const Config &Conf, ModuleSummaryIndex &CombinedIndex, + const DenseMap &ModuleToDefinedGVSummaries, + lto::IndexWriteCallback OnWrite, bool ShouldEmitImportsFiles, + ThreadPoolStrategy ThinLTOParallelism) + : Conf(Conf), CombinedIndex(CombinedIndex), + ModuleToDefinedGVSummaries(ModuleToDefinedGVSummaries), + OnWrite(OnWrite), ShouldEmitImportsFiles(ShouldEmitImportsFiles), + BackendThreadPool(ThinLTOParallelism) {} + + virtual ~ThinBackendProc() = default; + virtual Error start( + unsigned Task, BitcodeModule BM, + const FunctionImporter::ImportMapTy &ImportList, + const FunctionImporter::ExportSetTy &ExportList, + const std::map &ResolvedODR, + MapVector &ModuleMap) = 0; + Error wait() { + BackendThreadPool.wait(); + if (Err) + return std::move(*Err); + return Error::success(); + } + unsigned getThreadCount() { return BackendThreadPool.getMaxConcurrency(); } + virtual bool isSensitiveToInputOrder() { return false; } + + // Write sharded indices and (optionally) imports to disk + Error emitFiles(const FunctionImporter::ImportMapTy &ImportList, + llvm::StringRef ModulePath, + const std::string &NewModulePath) const; +}; + +/// This callable defines the behavior of a ThinLTO backend after the thin-link +/// phase. It accepts a configuration \p C, a combined module summary index +/// \p CombinedIndex, a map of module identifiers to global variable summaries +/// \p ModuleToDefinedGVSummaries, a function to add output streams \p +/// AddStream, and a file cache \p Cache. It returns a unique pointer to a +/// ThinBackendProc, which can be used to launch backends in parallel. +using ThinBackendFunction = std::function( const Config &C, ModuleSummaryIndex &CombinedIndex, - DenseMap &ModuleToDefinedGVSummaries, + const DenseMap &ModuleToDefinedGVSummaries, AddStreamFn AddStream, FileCache Cache)>; +/// This type defines the behavior following the thin-link phase during ThinLTO. +/// It encapsulates a backend function and a strategy for thread pool +/// parallelism. Clients should use one of the provided create*ThinBackend() +/// functions to instantiate a ThinBackend. Parallelism defines the thread pool +/// strategy to be used for processing. +struct ThinBackend { + ThinBackend(ThinBackendFunction Func, ThreadPoolStrategy Parallelism) + : Func(std::move(Func)), Parallelism(std::move(Parallelism)) {} + ThinBackend() = default; + + std::unique_ptr operator()( + const Config &Conf, ModuleSummaryIndex &CombinedIndex, + const DenseMap &ModuleToDefinedGVSummaries, + AddStreamFn AddStream, FileCache Cache) { + assert(isValid() && "Invalid backend function"); + return Func(Conf, CombinedIndex, ModuleToDefinedGVSummaries, + std::move(AddStream), std::move(Cache)); + } + ThreadPoolStrategy getParallelism() const { return Parallelism; } + bool isValid() const { return static_cast(Func); } + +private: + ThinBackendFunction Func = nullptr; + ThreadPoolStrategy Parallelism; +}; + /// This ThinBackend runs the individual backend jobs in-process. /// The default value means to use one job per hardware core (not hyper-thread). /// OnWrite is callback which receives module identifier and notifies LTO user @@ -210,7 +286,6 @@ using ThinBackend = std::function( /// to the same path as the input module, with suffix ".thinlto.bc" /// ShouldEmitImportsFiles is true it also writes a list of imported files to a /// similar path with ".imports" appended instead. -using IndexWriteCallback = std::function; ThinBackend createInProcessThinBackend(ThreadPoolStrategy Parallelism, IndexWriteCallback OnWrite = nullptr, bool ShouldEmitIndexFiles = false, @@ -231,7 +306,8 @@ ThinBackend createInProcessThinBackend(ThreadPoolStrategy Parallelism, /// the objects with NativeObjectPrefix instead of NewPrefix. OnWrite is /// callback which receives module identifier and notifies LTO user that index /// file for the module (and optionally imports file) was created. -ThinBackend createWriteIndexesThinBackend(std::string OldPrefix, +ThinBackend createWriteIndexesThinBackend(ThreadPoolStrategy Parallelism, + std::string OldPrefix, std::string NewPrefix, std::string NativeObjectPrefix, bool ShouldEmitImportsFiles, @@ -275,7 +351,7 @@ class LTO { /// this constructor. /// FIXME: We do currently require the DiagHandler field to be set in Conf. /// Until that is fixed, a Config argument is required. - LTO(Config Conf, ThinBackend Backend = nullptr, + LTO(Config Conf, ThinBackend Backend = {}, unsigned ParallelCodeGenParallelismLevel = 1, LTOKind LTOMode = LTOK_Default); ~LTO(); diff --git a/llvm/include/llvm/SandboxIR/Utils.h b/llvm/include/llvm/SandboxIR/Utils.h index e50621b4c1228e1..e4156c6af9a2208 100644 --- a/llvm/include/llvm/SandboxIR/Utils.h +++ b/llvm/include/llvm/SandboxIR/Utils.h @@ -71,9 +71,9 @@ class Utils { /// \Returns the gap between the memory locations accessed by \p I0 and /// \p I1 in bytes. template - static std::optional - getPointerDiffInBytes(LoadOrStoreT *I0, LoadOrStoreT *I1, ScalarEvolution &SE, - const DataLayout &DL) { + static std::optional getPointerDiffInBytes(LoadOrStoreT *I0, + LoadOrStoreT *I1, + ScalarEvolution &SE) { static_assert(std::is_same_v || std::is_same_v, "Expected sandboxir::Load or sandboxir::Store!"); @@ -84,8 +84,8 @@ class Utils { if (Ptr0 != Ptr1) return false; llvm::Type *ElemTy = llvm::Type::getInt8Ty(SE.getContext()); - return getPointersDiff(ElemTy, Opnd0, ElemTy, Opnd1, DL, SE, - /*StrictCheck=*/false, /*CheckType=*/false); + return getPointersDiff(ElemTy, Opnd0, ElemTy, Opnd1, I0->getDataLayout(), + SE, /*StrictCheck=*/false, /*CheckType=*/false); } /// \Returns true if \p I0 accesses a memory location lower than \p I1. @@ -93,8 +93,8 @@ class Utils { /// locations are equal, or if I1 accesses a memory location greater than I0. template static bool atLowerAddress(LoadOrStoreT *I0, LoadOrStoreT *I1, - ScalarEvolution &SE, const DataLayout &DL) { - auto Diff = getPointerDiffInBytes(I0, I1, SE, DL); + ScalarEvolution &SE) { + auto Diff = getPointerDiffInBytes(I0, I1, SE); if (!Diff) return false; return *Diff > 0; diff --git a/llvm/include/llvm/Support/Threading.h b/llvm/include/llvm/Support/Threading.h index ba6c531ab4db214..d8e2cb0514ddd7a 100644 --- a/llvm/include/llvm/Support/Threading.h +++ b/llvm/include/llvm/Support/Threading.h @@ -188,6 +188,18 @@ constexpr bool llvm_is_multithreaded() { return LLVM_ENABLE_THREADS; } return S; } + /// Like hardware_concurrency() above, but builds a strategy + /// based on the rules described for get_threadpool_strategy(). + /// If \p Num is invalid, returns a default strategy where one thread per + /// hardware core is used. + inline ThreadPoolStrategy hardware_concurrency(StringRef Num) { + std::optional S = + get_threadpool_strategy(Num, hardware_concurrency()); + if (S) + return *S; + return hardware_concurrency(); + } + /// Returns an optimal thread strategy to execute specified amount of tasks. /// This strategy should prevent us from creating too many threads if we /// occasionaly have an unexpectedly small amount of tasks. diff --git a/llvm/include/llvm/Support/TimeProfiler.h b/llvm/include/llvm/Support/TimeProfiler.h index 9e2ba31991f5422..8c0ecf9b7c911f2 100644 --- a/llvm/include/llvm/Support/TimeProfiler.h +++ b/llvm/include/llvm/Support/TimeProfiler.h @@ -83,6 +83,19 @@ namespace llvm { class raw_pwrite_stream; +// Type of the time trace event. +enum class TimeTraceEventType { + // Complete events have a duration (start and end time points) and are marked + // by the "X" phase type. + CompleteEvent, + // Instant events don't have a duration, they happen at an instant in time. + // They are marked with "i" phase type. The field End is ignored for them. + InstantEvent, + // Async events mark asynchronous operations and are specified by the "b" + // (start) and "e" (end) phase types. + AsyncEvent +}; + struct TimeTraceMetadata { std::string Detail; // Source file and line number information for the event. @@ -152,6 +165,10 @@ timeTraceProfilerBegin(StringRef Name, TimeTraceProfilerEntry *timeTraceAsyncProfilerBegin(StringRef Name, StringRef Detail); +// Mark an instant event. +void timeTraceAddInstantEvent(StringRef Name, + llvm::function_ref Detail); + /// Manually end the last time section. void timeTraceProfilerEnd(); void timeTraceProfilerEnd(TimeTraceProfilerEntry *E); diff --git a/llvm/include/llvm/TargetParser/AArch64CPUFeatures.inc b/llvm/include/llvm/TargetParser/AArch64CPUFeatures.inc index 5c6c3475ed910ba..bb1875fe9f72c8d 100644 --- a/llvm/include/llvm/TargetParser/AArch64CPUFeatures.inc +++ b/llvm/include/llvm/TargetParser/AArch64CPUFeatures.inc @@ -70,7 +70,7 @@ enum CPUFeatures { FEAT_MEMTAG3, FEAT_SB, FEAT_PREDRES, - FEAT_SSBS, + RESERVED_FEAT_SSBS, // previously used and now ABI legacy FEAT_SSBS2, FEAT_BTI, RESERVED_FEAT_LS64, // previously used and now ABI legacy diff --git a/llvm/include/llvm/Transforms/IPO/FunctionImport.h b/llvm/include/llvm/Transforms/IPO/FunctionImport.h index 4b29d3f40ab7b5b..3623f9194d4d133 100644 --- a/llvm/include/llvm/Transforms/IPO/FunctionImport.h +++ b/llvm/include/llvm/Transforms/IPO/FunctionImport.h @@ -417,9 +417,9 @@ void gatherImportedSummariesForModule( GVSummaryPtrSet &DecSummaries); /// Emit into \p OutputFilename the files module \p ModulePath will import from. -std::error_code -EmitImportsFiles(StringRef ModulePath, StringRef OutputFilename, - const ModuleToSummariesForIndexTy &ModuleToSummariesForIndex); +Error EmitImportsFiles( + StringRef ModulePath, StringRef OutputFilename, + const ModuleToSummariesForIndexTy &ModuleToSummariesForIndex); /// Based on the information recorded in the summaries during global /// summary-based analysis: diff --git a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/SeedCollector.h b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/SeedCollector.h new file mode 100644 index 000000000000000..460e3f675fa7973 --- /dev/null +++ b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/SeedCollector.h @@ -0,0 +1,127 @@ +//===- SeedCollector.h ------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// This file contains the mechanism for collecting the seed instructions that +// are used as starting points for forming the vectorization graph. +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_VECTORIZE_SANDBOXVECTORIZER_SEEDCOLLECTOR_H +#define LLVM_TRANSFORMS_VECTORIZE_SANDBOXVECTORIZER_SEEDCOLLECTOR_H + +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/SandboxIR/Instruction.h" +#include "llvm/SandboxIR/Utils.h" +#include "llvm/SandboxIR/Value.h" +#include +#include + +namespace llvm::sandboxir { + +/// A set of candidate Instructions for vectorizing together. +class SeedBundle { +public: + /// Initialize a bundle with \p I. + explicit SeedBundle(Instruction *I) { insertAt(begin(), I); } + explicit SeedBundle(SmallVector &&L) : Seeds(std::move(L)) { + for (auto &S : Seeds) + NumUnusedBits += Utils::getNumBits(S); + } + /// No need to allow copies. + SeedBundle(const SeedBundle &) = delete; + SeedBundle &operator=(const SeedBundle &) = delete; + virtual ~SeedBundle() {} + + using iterator = SmallVector::iterator; + using const_iterator = SmallVector::const_iterator; + iterator begin() { return Seeds.begin(); } + iterator end() { return Seeds.end(); } + const_iterator begin() const { return Seeds.begin(); } + const_iterator end() const { return Seeds.end(); } + + Instruction *operator[](unsigned Idx) const { return Seeds[Idx]; } + + /// Insert \p I into position \p P. Clients should choose Pos + /// by symbol, symbol-offset, and program order (which depends if scheduling + /// bottom-up or top-down). + void insertAt(iterator Pos, Instruction *I) { + Seeds.insert(Pos, I); + NumUnusedBits += Utils::getNumBits(I); + } + + unsigned getFirstUnusedElementIdx() const { + for (unsigned ElmIdx : seq(0, Seeds.size())) + if (!isUsed(ElmIdx)) + return ElmIdx; + return Seeds.size(); + } + /// Marks instruction \p I "used" within the bundle. Clients + /// use this property when assembling a vectorized instruction from + /// the seeds in a bundle. This allows constant time evaluation + /// and "removal" from the list. + void setUsed(Instruction *I) { + auto It = std::find(begin(), end(), I); + assert(It != end() && "Instruction not in the bundle!"); + auto Idx = It - begin(); + setUsed(Idx, 1, /*VerifyUnused=*/false); + } + + void setUsed(unsigned ElementIdx, unsigned Sz = 1, bool VerifyUnused = true) { + if (ElementIdx + Sz >= UsedLanes.size()) + UsedLanes.resize(ElementIdx + Sz); + for (unsigned Idx : seq(ElementIdx, ElementIdx + Sz)) { + assert((!VerifyUnused || !UsedLanes.test(Idx)) && + "Already marked as used!"); + UsedLanes.set(Idx); + UsedLaneCount++; + } + NumUnusedBits -= Utils::getNumBits(Seeds[ElementIdx]); + } + /// \Returns whether or not \p Element has been used. + bool isUsed(unsigned Element) const { + return Element < UsedLanes.size() && UsedLanes.test(Element); + } + bool allUsed() const { return UsedLaneCount == Seeds.size(); } + unsigned getNumUnusedBits() const { return NumUnusedBits; } + + /// \Returns a slice of seed elements, starting at the element \p StartIdx, + /// with a total size <= \p MaxVecRegBits, or an empty slice if the + /// requirements cannot be met . If \p ForcePowOf2 is true, then the returned + /// slice will have a total number of bits that is a power of 2. + MutableArrayRef + getSlice(unsigned StartIdx, unsigned MaxVecRegBits, bool ForcePowOf2); + +protected: + SmallVector Seeds; + /// The lanes that we have already vectorized. + BitVector UsedLanes; + /// Tracks used lanes for constant-time accessor. + unsigned UsedLaneCount = 0; + /// Tracks the remaining bits available to vectorize + unsigned NumUnusedBits = 0; + +public: +#ifndef NDEBUG + void dump(raw_ostream &OS) const { + for (auto [ElmIdx, I] : enumerate(*this)) { + OS.indent(2) << ElmIdx << ". "; + if (isUsed(ElmIdx)) + OS << "[USED]"; + else + OS << *I; + OS << "\n"; + } + } + LLVM_DUMP_METHOD void dump() const { + dump(dbgs()); + dbgs() << "\n"; + } +#endif // NDEBUG +}; +} // namespace llvm::sandboxir +#endif // LLVM_TRANSFORMS_VECTORIZE_SANDBOXVECTORIZER_SEEDCOLLECTOR_H diff --git a/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/llvm/lib/Analysis/BasicAliasAnalysis.cpp index f471c0db11d3ef2..1dcdad01f4c8093 100644 --- a/llvm/lib/Analysis/BasicAliasAnalysis.cpp +++ b/llvm/lib/Analysis/BasicAliasAnalysis.cpp @@ -219,10 +219,8 @@ bool EarliestEscapeInfo::isNotCapturedBefore(const Value *Object, Instruction *EarliestCapture = FindEarliestCapture( Object, *const_cast(DT.getRoot()->getParent()), /*ReturnCaptures=*/false, /*StoreCaptures=*/true, DT); - if (EarliestCapture) { - auto Ins = Inst2Obj.insert({EarliestCapture, {}}); - Ins.first->second.push_back(Object); - } + if (EarliestCapture) + Inst2Obj[EarliestCapture].push_back(Object); Iter.first->second = EarliestCapture; } diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp index a7a6de3f3b97b0e..a6ef271da11f1af 100644 --- a/llvm/lib/Analysis/ConstantFolding.cpp +++ b/llvm/lib/Analysis/ConstantFolding.cpp @@ -145,7 +145,8 @@ Constant *FoldBitCast(Constant *C, Type *DestTy, const DataLayout &DL) { // If this is a scalar -> vector cast, convert the input into a <1 x scalar> // vector so the code below can handle it uniformly. - if (isa(C) || isa(C)) { + if (!isa(C->getType()) && + (isa(C) || isa(C))) { Constant *Ops = C; // don't take the address of C! return FoldBitCast(ConstantVector::get(Ops), DestTy, DL); } diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp index fe00ea0097a43ab..d35bf6818d43790 100644 --- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -512,10 +512,8 @@ void RuntimePointerChecking::groupChecks( unsigned TotalComparisons = 0; DenseMap> PositionMap; - for (unsigned Index = 0; Index < Pointers.size(); ++Index) { - auto [It, _] = PositionMap.insert({Pointers[Index].PointerValue, {}}); - It->second.push_back(Index); - } + for (unsigned Index = 0; Index < Pointers.size(); ++Index) + PositionMap[Pointers[Index].PointerValue].push_back(Index); // We need to keep track of what pointers we've already seen so we // don't process them twice. diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index c939270ed39a654..3d890f05c8ca210 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -1564,8 +1564,7 @@ static void insertFoldCacheEntry( UserIDs.pop_back(); I.first->second = S; } - auto R = FoldCacheUser.insert({S, {}}); - R.first->second.push_back(ID); + FoldCacheUser[S].push_back(ID); } const SCEV * diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp index b5195f764cbd1c3..b612a3331e5737d 100644 --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -592,6 +592,11 @@ bool TargetTransformInfo::isTargetIntrinsicTriviallyScalarizable( return TTIImpl->isTargetIntrinsicTriviallyScalarizable(ID); } +bool TargetTransformInfo::isTargetIntrinsicWithScalarOpAtArg( + Intrinsic::ID ID, unsigned ScalarOpdIdx) const { + return TTIImpl->isTargetIntrinsicWithScalarOpAtArg(ID, ScalarOpdIdx); +} + InstructionCost TargetTransformInfo::getScalarizationOverhead( VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind) const { diff --git a/llvm/lib/AsmParser/LLLexer.cpp b/llvm/lib/AsmParser/LLLexer.cpp index a3e47da77fe7764..f71cbe1b4b1e887 100644 --- a/llvm/lib/AsmParser/LLLexer.cpp +++ b/llvm/lib/AsmParser/LLLexer.cpp @@ -25,9 +25,21 @@ using namespace llvm; -bool LLLexer::Error(LocTy ErrorLoc, const Twine &Msg) const { - ErrorInfo = SM.GetMessage(ErrorLoc, SourceMgr::DK_Error, Msg); - return true; +// Both the lexer and parser can issue error messages. If the lexer issues a +// lexer error, since we do not terminate execution immediately, usually that +// is followed by the parser issuing a parser error. However, the error issued +// by the lexer is more relevant in that case as opposed to potentially more +// generic parser error. So instead of always recording the last error message +// use the `Priority` to establish a priority, with Lexer > Parser > None. We +// record the issued message only if the message has same or higher priority +// than the existing one. This prevents lexer errors from being overwritten by +// parser errors. +void LLLexer::Error(LocTy ErrorLoc, const Twine &Msg, + LLLexer::ErrorPriority Priority) { + if (Priority < ErrorInfo.Priority) + return; + ErrorInfo.Error = SM.GetMessage(ErrorLoc, SourceMgr::DK_Error, Msg); + ErrorInfo.Priority = Priority; } void LLLexer::Warning(LocTy WarningLoc, const Twine &Msg) const { @@ -49,7 +61,7 @@ uint64_t LLLexer::atoull(const char *Buffer, const char *End) { Result *= 10; Result += *Buffer-'0'; if (Result < OldRes) { // Uh, oh, overflow detected!!! - Error("constant bigger than 64 bits detected!"); + LexError("constant bigger than 64 bits detected!"); return 0; } } @@ -64,7 +76,7 @@ uint64_t LLLexer::HexIntToVal(const char *Buffer, const char *End) { Result += hexDigitValue(*Buffer); if (Result < OldRes) { // Uh, oh, overflow detected!!! - Error("constant bigger than 64 bits detected!"); + LexError("constant bigger than 64 bits detected!"); return 0; } } @@ -87,7 +99,7 @@ void LLLexer::HexToIntPair(const char *Buffer, const char *End, Pair[1] += hexDigitValue(*Buffer); } if (Buffer != End) - Error("constant bigger than 128 bits detected!"); + LexError("constant bigger than 128 bits detected!"); } /// FP80HexToIntPair - translate an 80 bit FP80 number (20 hexits) into @@ -106,7 +118,7 @@ void LLLexer::FP80HexToIntPair(const char *Buffer, const char *End, Pair[0] += hexDigitValue(*Buffer); } if (Buffer != End) - Error("constant bigger than 128 bits detected!"); + LexError("constant bigger than 128 bits detected!"); } // UnEscapeLexed - Run through the specified buffer and change \xx codes to the @@ -273,14 +285,14 @@ lltok::Kind LLLexer::LexDollar() { int CurChar = getNextChar(); if (CurChar == EOF) { - Error("end of file in COMDAT variable name"); + LexError("end of file in COMDAT variable name"); return lltok::Error; } if (CurChar == '"') { StrVal.assign(TokStart + 2, CurPtr - 1); UnEscapeLexed(StrVal); if (StringRef(StrVal).contains(0)) { - Error("Null bytes are not allowed in names"); + LexError("Null bytes are not allowed in names"); return lltok::Error; } return lltok::ComdatVar; @@ -302,7 +314,7 @@ lltok::Kind LLLexer::ReadString(lltok::Kind kind) { int CurChar = getNextChar(); if (CurChar == EOF) { - Error("end of file in string constant"); + LexError("end of file in string constant"); return lltok::Error; } if (CurChar == '"') { @@ -342,7 +354,7 @@ lltok::Kind LLLexer::LexUIntID(lltok::Kind Token) { uint64_t Val = atoull(TokStart + 1, CurPtr); if ((unsigned)Val != Val) - Error("invalid value number (too large)!"); + LexError("invalid value number (too large)!"); UIntVal = unsigned(Val); return Token; } @@ -356,14 +368,14 @@ lltok::Kind LLLexer::LexVar(lltok::Kind Var, lltok::Kind VarID) { int CurChar = getNextChar(); if (CurChar == EOF) { - Error("end of file in global variable name"); + LexError("end of file in global variable name"); return lltok::Error; } if (CurChar == '"') { StrVal.assign(TokStart+2, CurPtr-1); UnEscapeLexed(StrVal); if (StringRef(StrVal).contains(0)) { - Error("Null bytes are not allowed in names"); + LexError("Null bytes are not allowed in names"); return lltok::Error; } return Var; @@ -398,7 +410,7 @@ lltok::Kind LLLexer::LexQuote() { if (CurPtr[0] == ':') { ++CurPtr; if (StringRef(StrVal).contains(0)) { - Error("Null bytes are not allowed in names"); + LexError("Null bytes are not allowed in names"); kind = lltok::Error; } else { kind = lltok::LabelStr; @@ -480,7 +492,7 @@ lltok::Kind LLLexer::LexIdentifier() { uint64_t NumBits = atoull(StartChar, CurPtr); if (NumBits < IntegerType::MIN_INT_BITS || NumBits > IntegerType::MAX_INT_BITS) { - Error("bitwidth for integer type out of range!"); + LexError("bitwidth for integer type out of range!"); return lltok::Error; } TyVal = IntegerType::get(Context, NumBits); @@ -1109,7 +1121,7 @@ lltok::Kind LLLexer::LexDigitOrNegative() { uint64_t Val = atoull(TokStart, CurPtr); ++CurPtr; // Skip the colon. if ((unsigned)Val != Val) - Error("invalid value number (too large)!"); + LexError("invalid value number (too large)!"); UIntVal = unsigned(Val); return lltok::LabelID; } diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp index d84521d2e6e10db..6450d8b109063a2 100644 --- a/llvm/lib/AsmParser/LLParser.cpp +++ b/llvm/lib/AsmParser/LLParser.cpp @@ -3220,7 +3220,7 @@ bool LLParser::parseOptionalOperandBundles( } bool LLParser::checkValueID(LocTy Loc, StringRef Kind, StringRef Prefix, - unsigned NextID, unsigned ID) const { + unsigned NextID, unsigned ID) { if (ID < NextID) return error(Loc, Kind + " expected to be numbered '" + Prefix + Twine(NextID) + "' or greater"); @@ -5192,7 +5192,7 @@ bool LLParser::parseDILocation(MDNode *&Result, bool IsDistinct) { /// ::= distinct !DIAssignID() bool LLParser::parseDIAssignID(MDNode *&Result, bool IsDistinct) { if (!IsDistinct) - return Lex.Error("missing 'distinct', required for !DIAssignID()"); + return tokError("missing 'distinct', required for !DIAssignID()"); Lex.Lex(); @@ -5500,7 +5500,7 @@ bool LLParser::parseDIFile(MDNode *&Result, bool IsDistinct) { if (checksumkind.Seen && checksum.Seen) OptChecksum.emplace(checksumkind.Val, checksum.Val); else if (checksumkind.Seen || checksum.Seen) - return Lex.Error("'checksumkind' and 'checksum' must be provided together"); + return tokError("'checksumkind' and 'checksum' must be provided together"); MDString *Source = nullptr; if (source.Seen) @@ -5519,7 +5519,7 @@ bool LLParser::parseDIFile(MDNode *&Result, bool IsDistinct) { /// sysroot: "/", sdk: "MacOSX.sdk") bool LLParser::parseDICompileUnit(MDNode *&Result, bool IsDistinct) { if (!IsDistinct) - return Lex.Error("missing 'distinct', required for !DICompileUnit"); + return tokError("missing 'distinct', required for !DICompileUnit"); #define VISIT_MD_FIELDS(OPTIONAL, REQUIRED) \ REQUIRED(language, DwarfLangField, ); \ @@ -5599,7 +5599,7 @@ bool LLParser::parseDISubprogram(MDNode *&Result, bool IsDistinct) { : DISubprogram::toSPFlags(isLocal.Val, isDefinition.Val, isOptimized.Val, virtuality.Val); if ((SPFlags & DISubprogram::SPFlagDefinition) && !IsDistinct) - return Lex.Error( + return error( Loc, "missing 'distinct', required for !DISubprogram that is a Definition"); Result = GET_OR_DISTINCT( @@ -7952,10 +7952,10 @@ bool LLParser::parseLandingPad(Instruction *&Inst, PerFunctionState &PFS) { // array constant. if (CT == LandingPadInst::Catch) { if (isa(V->getType())) - error(VLoc, "'catch' clause has an invalid type"); + return error(VLoc, "'catch' clause has an invalid type"); } else { if (!isa(V->getType())) - error(VLoc, "'filter' clause has an invalid type"); + return error(VLoc, "'filter' clause has an invalid type"); } Constant *CV = dyn_cast(V); @@ -8639,7 +8639,7 @@ bool LLParser::parseUseListOrderIndexes(SmallVectorImpl &Indexes) { if (parseToken(lltok::lbrace, "expected '{' here")) return true; if (Lex.getKind() == lltok::rbrace) - return Lex.Error("expected non-empty list of uselistorder indexes"); + return tokError("expected non-empty list of uselistorder indexes"); // Use Offset, Max, and IsOrdered to check consistency of indexes. The // indexes should be distinct numbers in the range [0, size-1], and should diff --git a/llvm/lib/CodeGen/EarlyIfConversion.cpp b/llvm/lib/CodeGen/EarlyIfConversion.cpp index 2b97fa449f08f73..8d9813edd7e52ac 100644 --- a/llvm/lib/CodeGen/EarlyIfConversion.cpp +++ b/llvm/lib/CodeGen/EarlyIfConversion.cpp @@ -122,18 +122,6 @@ class SSAIfConv { /// The branch condition determined by analyzeBranch. SmallVector Cond; - class PredicationStrategyBase { - public: - virtual bool canConvertIf(MachineBasicBlock *Tail) { return true; } - virtual bool canPredicateInstr(const MachineInstr &I) = 0; - virtual void predicateBlock(MachineBasicBlock *MBB, - ArrayRef Cond, - bool Reverse) = 0; - virtual ~PredicationStrategyBase() = default; - }; - - PredicationStrategyBase &Predicate; - private: /// Instructions in Head that define values used by the conditional blocks. /// The hoisted instructions must be inserted after these instructions. @@ -149,6 +137,10 @@ class SSAIfConv { /// and FBB. MachineBasicBlock::iterator InsertionPoint; + /// Return true if all non-terminator instructions in MBB can be safely + /// speculated. + bool canSpeculateInstrs(MachineBasicBlock *MBB); + /// Return true if all non-terminator instructions in MBB can be safely /// predicated. bool canPredicateInstrs(MachineBasicBlock *MBB); @@ -157,6 +149,10 @@ class SSAIfConv { /// Return false if any dependency is incompatible with if conversion. bool InstrDependenciesAllowIfConv(MachineInstr *I); + /// Predicate all instructions of the basic block with current condition + /// except for terminators. Reverse the condition if ReversePredicate is set. + void PredicateBlock(MachineBasicBlock *MBB, bool ReversePredicate); + /// Find a valid insertion point in Head. bool findInsertionPoint(); @@ -167,8 +163,8 @@ class SSAIfConv { void rewritePHIOperands(); public: - SSAIfConv(PredicationStrategyBase &Predicate, MachineFunction &MF) - : Predicate(Predicate) { + /// init - Initialize per-function data structures. + void init(MachineFunction &MF) { TII = MF.getSubtarget().getInstrInfo(); TRI = MF.getSubtarget().getRegisterInfo(); MRI = &MF.getRegInfo(); @@ -180,14 +176,76 @@ class SSAIfConv { /// canConvertIf - If the sub-CFG headed by MBB can be if-converted, /// initialize the internal state, and return true. - bool canConvertIf(MachineBasicBlock *MBB); + /// If predicate is set try to predicate the block otherwise try to + /// speculatively execute it. + bool canConvertIf(MachineBasicBlock *MBB, bool Predicate = false); /// convertIf - If-convert the last block passed to canConvertIf(), assuming /// it is possible. Add any blocks that are to be erased to RemoveBlocks. - void convertIf(SmallVectorImpl &RemoveBlocks); + void convertIf(SmallVectorImpl &RemoveBlocks, + bool Predicate = false); }; } // end anonymous namespace +/// canSpeculateInstrs - Returns true if all the instructions in MBB can safely +/// be speculated. The terminators are not considered. +/// +/// If instructions use any values that are defined in the head basic block, +/// the defining instructions are added to InsertAfter. +/// +/// Any clobbered regunits are added to ClobberedRegUnits. +/// +bool SSAIfConv::canSpeculateInstrs(MachineBasicBlock *MBB) { + // Reject any live-in physregs. It's probably CPSR/EFLAGS, and very hard to + // get right. + if (!MBB->livein_empty()) { + LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << " has live-ins.\n"); + return false; + } + + unsigned InstrCount = 0; + + // Check all instructions, except the terminators. It is assumed that + // terminators never have side effects or define any used register values. + for (MachineInstr &MI : + llvm::make_range(MBB->begin(), MBB->getFirstTerminator())) { + if (MI.isDebugInstr()) + continue; + + if (++InstrCount > BlockInstrLimit && !Stress) { + LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << " has more than " + << BlockInstrLimit << " instructions.\n"); + return false; + } + + // There shouldn't normally be any phis in a single-predecessor block. + if (MI.isPHI()) { + LLVM_DEBUG(dbgs() << "Can't hoist: " << MI); + return false; + } + + // Don't speculate loads. Note that it may be possible and desirable to + // speculate GOT or constant pool loads that are guaranteed not to trap, + // but we don't support that for now. + if (MI.mayLoad()) { + LLVM_DEBUG(dbgs() << "Won't speculate load: " << MI); + return false; + } + + // We never speculate stores, so an AA pointer isn't necessary. + bool DontMoveAcrossStore = true; + if (!MI.isSafeToMove(DontMoveAcrossStore)) { + LLVM_DEBUG(dbgs() << "Can't speculate: " << MI); + return false; + } + + // Check for any dependencies on Head instructions. + if (!InstrDependenciesAllowIfConv(&MI)) + return false; + } + return true; +} + /// Check that there is no dependencies preventing if conversion. /// /// If instruction uses any values that are defined in the head basic block, @@ -261,8 +319,17 @@ bool SSAIfConv::canPredicateInstrs(MachineBasicBlock *MBB) { return false; } - if (!Predicate.canPredicateInstr(*I)) + // Check that instruction is predicable + if (!TII->isPredicable(*I)) { + LLVM_DEBUG(dbgs() << "Isn't predicable: " << *I); + return false; + } + + // Check that instruction is not already predicated. + if (TII->isPredicated(*I) && !TII->canPredicatePredicatedInstr(*I)) { + LLVM_DEBUG(dbgs() << "Is already predicated: " << *I); return false; + } // Check for any dependencies on Head instructions. if (!InstrDependenciesAllowIfConv(&(*I))) @@ -271,6 +338,24 @@ bool SSAIfConv::canPredicateInstrs(MachineBasicBlock *MBB) { return true; } +// Apply predicate to all instructions in the machine block. +void SSAIfConv::PredicateBlock(MachineBasicBlock *MBB, bool ReversePredicate) { + auto Condition = Cond; + if (ReversePredicate) { + bool CanRevCond = !TII->reverseBranchCondition(Condition); + assert(CanRevCond && "Reversed predicate is not supported"); + (void)CanRevCond; + } + // Terminators don't need to be predicated as they will be removed. + for (MachineBasicBlock::iterator I = MBB->begin(), + E = MBB->getFirstTerminator(); + I != E; ++I) { + if (I->isDebugInstr()) + continue; + TII->PredicateInstruction(*I, Condition); + } +} + /// Find an insertion point in Head for the speculated instructions. The /// insertion point must be: /// @@ -349,7 +434,7 @@ bool SSAIfConv::findInsertionPoint() { /// canConvertIf - analyze the sub-cfg rooted in MBB, and return true if it is /// a potential candidate for if-conversion. Fill out the internal state. /// -bool SSAIfConv::canConvertIf(MachineBasicBlock *MBB) { +bool SSAIfConv::canConvertIf(MachineBasicBlock *MBB, bool Predicate) { Head = MBB; TBB = FBB = Tail = nullptr; @@ -389,6 +474,14 @@ bool SSAIfConv::canConvertIf(MachineBasicBlock *MBB) { << printMBBReference(*Tail) << '\n'); } + // This is a triangle or a diamond. + // Skip if we cannot predicate and there are no phis skip as there must be + // side effects that can only be handled with predication. + if (!Predicate && (Tail->empty() || !Tail->front().isPHI())) { + LLVM_DEBUG(dbgs() << "No phis in tail.\n"); + return false; + } + // The branch we're looking to eliminate must be analyzable. Cond.clear(); if (TII->analyzeBranch(*Head, TBB, FBB, Cond)) { @@ -396,10 +489,6 @@ bool SSAIfConv::canConvertIf(MachineBasicBlock *MBB) { return false; } - if (!Predicate.canConvertIf(Tail)) { - return false; - } - // This is weird, probably some sort of degenerate CFG. if (!TBB) { LLVM_DEBUG(dbgs() << "analyzeBranch didn't find conditional branch.\n"); @@ -447,9 +536,17 @@ bool SSAIfConv::canConvertIf(MachineBasicBlock *MBB) { // Check that the conditional instructions can be speculated. InsertAfter.clear(); ClobberedRegUnits.reset(); - for (MachineBasicBlock *MBB : {TBB, FBB}) - if (MBB != Tail && !canPredicateInstrs(MBB)) + if (Predicate) { + if (TBB != Tail && !canPredicateInstrs(TBB)) + return false; + if (FBB != Tail && !canPredicateInstrs(FBB)) + return false; + } else { + if (TBB != Tail && !canSpeculateInstrs(TBB)) return false; + if (FBB != Tail && !canSpeculateInstrs(FBB)) + return false; + } // Try to find a valid insertion point for the speculated instructions in the // head basic block. @@ -582,7 +679,8 @@ void SSAIfConv::rewritePHIOperands() { /// /// Any basic blocks that need to be erased will be added to RemoveBlocks. /// -void SSAIfConv::convertIf(SmallVectorImpl &RemoveBlocks) { +void SSAIfConv::convertIf(SmallVectorImpl &RemoveBlocks, + bool Predicate) { assert(Head && Tail && TBB && FBB && "Call canConvertIf first."); // Update statistics. @@ -592,15 +690,16 @@ void SSAIfConv::convertIf(SmallVectorImpl &RemoveBlocks) { ++NumDiamondsConv; // Move all instructions into Head, except for the terminators. - for (MachineBasicBlock *MBB : {TBB, FBB}) { - if (MBB != Tail) { - // reverse the condition for the false bb - Predicate.predicateBlock(MBB, Cond, MBB == FBB); - Head->splice(InsertionPoint, MBB, MBB->begin(), - MBB->getFirstTerminator()); - } + if (TBB != Tail) { + if (Predicate) + PredicateBlock(TBB, /*ReversePredicate=*/false); + Head->splice(InsertionPoint, TBB, TBB->begin(), TBB->getFirstTerminator()); + } + if (FBB != Tail) { + if (Predicate) + PredicateBlock(FBB, /*ReversePredicate=*/true); + Head->splice(InsertionPoint, FBB, FBB->begin(), FBB->getFirstTerminator()); } - // Are there extra Tail predecessors? bool ExtraPreds = Tail->pred_size() != 2; if (ExtraPreds) @@ -670,6 +769,7 @@ class EarlyIfConverter : public MachineFunctionPass { MachineLoopInfo *Loops = nullptr; MachineTraceMetrics *Traces = nullptr; MachineTraceMetrics::Ensemble *MinInstr = nullptr; + SSAIfConv IfConv; public: static char ID; @@ -679,9 +779,9 @@ class EarlyIfConverter : public MachineFunctionPass { StringRef getPassName() const override { return "Early If-Conversion"; } private: - bool tryConvertIf(SSAIfConv &IfConv, MachineBasicBlock *); - void invalidateTraces(SSAIfConv &IfConv); - bool shouldConvertIf(SSAIfConv &IfConv); + bool tryConvertIf(MachineBasicBlock *); + void invalidateTraces(); + bool shouldConvertIf(); }; } // end anonymous namespace @@ -737,7 +837,7 @@ void updateLoops(MachineLoopInfo *Loops, } // namespace /// Invalidate MachineTraceMetrics before if-conversion. -void EarlyIfConverter::invalidateTraces(SSAIfConv &IfConv) { +void EarlyIfConverter::invalidateTraces() { Traces->verifyAnalysis(); Traces->invalidate(IfConv.Head); Traces->invalidate(IfConv.Tail); @@ -764,50 +864,10 @@ template Remark &operator<<(Remark &R, Cycles C) { } } // anonymous namespace -class SpeculateStrategy : public SSAIfConv::PredicationStrategyBase { -public: - bool canConvertIf(MachineBasicBlock *Tail) override { - // This is a triangle or a diamond. - // Skip if we cannot predicate and there are no phis skip as there must - // be side effects that can only be handled with predication. - if (Tail->empty() || !Tail->front().isPHI()) { - LLVM_DEBUG(dbgs() << "No phis in tail.\n"); - return false; - } - return true; - } - - bool canPredicateInstr(const MachineInstr &I) override { - // Don't speculate loads. Note that it may be possible and desirable to - // speculate GOT or constant pool loads that are guaranteed not to trap, - // but we don't support that for now. - if (I.mayLoad()) { - LLVM_DEBUG(dbgs() << "Won't speculate load: " << I); - return false; - } - - // We never speculate stores, so an AA pointer isn't necessary. - bool DontMoveAcrossStore = true; - if (!I.isSafeToMove(DontMoveAcrossStore)) { - LLVM_DEBUG(dbgs() << "Can't speculate: " << I); - return false; - } - return true; - } - - void predicateBlock(MachineBasicBlock *MBB, ArrayRef Cond, - bool Reverse) - override { /* do nothing, everything is speculatable and it's valid to - move the instructions into the head */ - } - - ~SpeculateStrategy() override = default; -}; - /// Apply cost model and heuristics to the if-conversion in IfConv. /// Return true if the conversion is a good idea. /// -bool EarlyIfConverter::shouldConvertIf(SSAIfConv &IfConv) { +bool EarlyIfConverter::shouldConvertIf() { // Stress testing mode disables all cost considerations. if (Stress) return true; @@ -1000,11 +1060,11 @@ bool EarlyIfConverter::shouldConvertIf(SSAIfConv &IfConv) { /// Attempt repeated if-conversion on MBB, return true if successful. /// -bool EarlyIfConverter::tryConvertIf(SSAIfConv &IfConv, MachineBasicBlock *MBB) { +bool EarlyIfConverter::tryConvertIf(MachineBasicBlock *MBB) { bool Changed = false; - while (IfConv.canConvertIf(MBB) && shouldConvertIf(IfConv)) { + while (IfConv.canConvertIf(MBB) && shouldConvertIf()) { // If-convert MBB and update analyses. - invalidateTraces(IfConv); + invalidateTraces(); SmallVector RemoveBlocks; IfConv.convertIf(RemoveBlocks); Changed = true; @@ -1037,15 +1097,14 @@ bool EarlyIfConverter::runOnMachineFunction(MachineFunction &MF) { MinInstr = nullptr; bool Changed = false; - SpeculateStrategy Speculate; - SSAIfConv IfConv(Speculate, MF); + IfConv.init(MF); // Visit blocks in dominator tree post-order. The post-order enables nested // if-conversion in a single pass. The tryConvertIf() function may erase // blocks, but only blocks dominated by the head block. This makes it safe to // update the dominator tree while the post-order iterator is still active. for (auto *DomNode : post_order(DomTree)) - if (tryConvertIf(IfConv, DomNode->getBlock())) + if (tryConvertIf(DomNode->getBlock())) Changed = true; return Changed; @@ -1064,6 +1123,7 @@ class EarlyIfPredicator : public MachineFunctionPass { MachineDominatorTree *DomTree = nullptr; MachineBranchProbabilityInfo *MBPI = nullptr; MachineLoopInfo *Loops = nullptr; + SSAIfConv IfConv; public: static char ID; @@ -1073,8 +1133,8 @@ class EarlyIfPredicator : public MachineFunctionPass { StringRef getPassName() const override { return "Early If-predicator"; } protected: - bool tryConvertIf(SSAIfConv &IfConv, MachineBasicBlock *); - bool shouldConvertIf(SSAIfConv &IfConv); + bool tryConvertIf(MachineBasicBlock *); + bool shouldConvertIf(); }; } // end anonymous namespace @@ -1100,50 +1160,8 @@ void EarlyIfPredicator::getAnalysisUsage(AnalysisUsage &AU) const { MachineFunctionPass::getAnalysisUsage(AU); } -class PredicatorStrategy : public SSAIfConv::PredicationStrategyBase { - const TargetInstrInfo *TII = nullptr; - -public: - PredicatorStrategy(const TargetInstrInfo *TII) : TII(TII) {} - - bool canPredicateInstr(const MachineInstr &I) override { - // Check that instruction is predicable - if (!TII->isPredicable(I)) { - LLVM_DEBUG(dbgs() << "Isn't predicable: " << I); - return false; - } - - // Check that instruction is not already predicated. - if (TII->isPredicated(I) && !TII->canPredicatePredicatedInstr(I)) { - LLVM_DEBUG(dbgs() << "Is already predicated: " << I); - return false; - } - return true; - } - - void predicateBlock(MachineBasicBlock *MBB, ArrayRef Cond, - bool Reverse) override { - SmallVector Condition(Cond); - if (Reverse) { - bool CanRevCond = !TII->reverseBranchCondition(Condition); - assert(CanRevCond && "Reversed predicate is not supported"); - (void)CanRevCond; - } - // Terminators don't need to be predicated as they will be removed. - for (MachineBasicBlock::iterator I = MBB->begin(), - E = MBB->getFirstTerminator(); - I != E; ++I) { - if (I->isDebugInstr()) - continue; - TII->PredicateInstruction(*I, Condition); - } - } - - ~PredicatorStrategy() override = default; -}; - /// Apply the target heuristic to decide if the transformation is profitable. -bool EarlyIfPredicator::shouldConvertIf(SSAIfConv &IfConv) { +bool EarlyIfPredicator::shouldConvertIf() { auto TrueProbability = MBPI->getEdgeProbability(IfConv.Head, IfConv.TBB); if (IfConv.isTriangle()) { MachineBasicBlock &IfBlock = @@ -1183,13 +1201,12 @@ bool EarlyIfPredicator::shouldConvertIf(SSAIfConv &IfConv) { /// Attempt repeated if-conversion on MBB, return true if successful. /// -bool EarlyIfPredicator::tryConvertIf(SSAIfConv &IfConv, - MachineBasicBlock *MBB) { +bool EarlyIfPredicator::tryConvertIf(MachineBasicBlock *MBB) { bool Changed = false; - while (IfConv.canConvertIf(MBB) && shouldConvertIf(IfConv)) { + while (IfConv.canConvertIf(MBB, /*Predicate*/ true) && shouldConvertIf()) { // If-convert MBB and update analyses. SmallVector RemoveBlocks; - IfConv.convertIf(RemoveBlocks); + IfConv.convertIf(RemoveBlocks, /*Predicate*/ true); Changed = true; updateDomTree(DomTree, IfConv, RemoveBlocks); for (MachineBasicBlock *MBB : RemoveBlocks) @@ -1215,15 +1232,14 @@ bool EarlyIfPredicator::runOnMachineFunction(MachineFunction &MF) { MBPI = &getAnalysis().getMBPI(); bool Changed = false; - PredicatorStrategy Predicate(TII); - SSAIfConv IfConv(Predicate, MF); + IfConv.init(MF); // Visit blocks in dominator tree post-order. The post-order enables nested // if-conversion in a single pass. The tryConvertIf() function may erase // blocks, but only blocks dominated by the head block. This makes it safe to // update the dominator tree while the post-order iterator is still active. for (auto *DomNode : post_order(DomTree)) - if (tryConvertIf(IfConv, DomNode->getBlock())) + if (tryConvertIf(DomNode->getBlock())) Changed = true; return Changed; diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 42d031310d5e027..ea22b4670d6f1f7 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -1657,7 +1657,8 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode *Node) const { SDValue SignBit = DAG.getNode(ISD::AND, DL, IntVT, SignAsInt.IntValue, SignMask); - // If FABS is legal transform FCOPYSIGN(x, y) => sign(x) ? -FABS(x) : FABS(X) + // If FABS is legal transform + // FCOPYSIGN(x, y) => SignBit(y) ? -FABS(x) : FABS(x) EVT FloatVT = Mag.getValueType(); if (TLI.isOperationLegalOrCustom(ISD::FABS, FloatVT) && TLI.isOperationLegalOrCustom(ISD::FNEG, FloatVT)) { diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp index e469c2ae52eb72e..3753509f9aa718d 100644 --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -34,9 +34,11 @@ #include "llvm/IR/IntrinsicsWebAssembly.h" #include "llvm/IR/IntrinsicsX86.h" #include "llvm/IR/LLVMContext.h" +#include "llvm/IR/MDBuilder.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/IR/Verifier.h" +#include "llvm/Support/AMDGPUAddrSpace.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Regex.h" @@ -4270,13 +4272,22 @@ static Value *upgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI, AtomicRMWInst *RMW = Builder.CreateAtomicRMW(RMWOp, Ptr, Val, std::nullopt, Order, SSID); - if (PtrTy->getAddressSpace() != 3) { + unsigned AddrSpace = PtrTy->getAddressSpace(); + if (AddrSpace != AMDGPUAS::LOCAL_ADDRESS) { MDNode *EmptyMD = MDNode::get(F->getContext(), {}); RMW->setMetadata("amdgpu.no.fine.grained.memory", EmptyMD); if (RMWOp == AtomicRMWInst::FAdd && RetTy->isFloatTy()) RMW->setMetadata("amdgpu.ignore.denormal.mode", EmptyMD); } + if (AddrSpace == AMDGPUAS::FLAT_ADDRESS) { + MDBuilder MDB(F->getContext()); + MDNode *RangeNotPrivate = + MDB.createRange(APInt(32, AMDGPUAS::PRIVATE_ADDRESS), + APInt(32, AMDGPUAS::PRIVATE_ADDRESS + 1)); + RMW->setMetadata(LLVMContext::MD_noalias_addrspace, RangeNotPrivate); + } + if (IsVolatile) RMW->setVolatile(true); diff --git a/llvm/lib/IR/ConstantFPRange.cpp b/llvm/lib/IR/ConstantFPRange.cpp index 74c9797d969f9d1..750918812852486 100644 --- a/llvm/lib/IR/ConstantFPRange.cpp +++ b/llvm/lib/IR/ConstantFPRange.cpp @@ -221,14 +221,57 @@ ConstantFPRange::makeAllowedFCmpRegion(FCmpInst::Predicate Pred, ConstantFPRange ConstantFPRange::makeSatisfyingFCmpRegion(FCmpInst::Predicate Pred, const ConstantFPRange &Other) { - // TODO - return getEmpty(Other.getSemantics()); + if (Other.isEmptySet()) + return getFull(Other.getSemantics()); + if (Other.containsNaN() && FCmpInst::isOrdered(Pred)) + return getEmpty(Other.getSemantics()); + if (Other.isNaNOnly() && FCmpInst::isUnordered(Pred)) + return getFull(Other.getSemantics()); + + switch (Pred) { + case FCmpInst::FCMP_TRUE: + return getFull(Other.getSemantics()); + case FCmpInst::FCMP_FALSE: + return getEmpty(Other.getSemantics()); + case FCmpInst::FCMP_ORD: + return getNonNaN(Other.getSemantics()); + case FCmpInst::FCMP_UNO: + return getNaNOnly(Other.getSemantics(), /*MayBeQNaN=*/true, + /*MayBeSNaN=*/true); + case FCmpInst::FCMP_OEQ: + case FCmpInst::FCMP_UEQ: + return setNaNField(Other.isSingleElement(/*ExcludesNaN=*/true) || + ((Other.classify() & ~fcNan) == fcZero) + ? extendZeroIfEqual(Other, Pred) + : getEmpty(Other.getSemantics()), + Pred); + case FCmpInst::FCMP_ONE: + case FCmpInst::FCMP_UNE: + return getEmpty(Other.getSemantics()); + case FCmpInst::FCMP_OLT: + case FCmpInst::FCMP_OLE: + case FCmpInst::FCMP_ULT: + case FCmpInst::FCMP_ULE: + return setNaNField( + extendZeroIfEqual(makeLessThan(Other.getLower(), Pred), Pred), Pred); + case FCmpInst::FCMP_OGT: + case FCmpInst::FCMP_OGE: + case FCmpInst::FCMP_UGT: + case FCmpInst::FCMP_UGE: + return setNaNField( + extendZeroIfEqual(makeGreaterThan(Other.getUpper(), Pred), Pred), Pred); + default: + llvm_unreachable("Unexpected predicate"); + } } std::optional ConstantFPRange::makeExactFCmpRegion(FCmpInst::Predicate Pred, const APFloat &Other) { - return std::nullopt; + if ((Pred == FCmpInst::FCMP_UNE || Pred == FCmpInst::FCMP_ONE) && + !Other.isNaN()) + return std::nullopt; + return makeSatisfyingFCmpRegion(Pred, ConstantFPRange(Other)); } bool ConstantFPRange::fcmp(FCmpInst::Predicate Pred, diff --git a/llvm/lib/IR/ConstantFold.cpp b/llvm/lib/IR/ConstantFold.cpp index a6f46da313e2132..57d9a03c9c22b83 100644 --- a/llvm/lib/IR/ConstantFold.cpp +++ b/llvm/lib/IR/ConstantFold.cpp @@ -81,8 +81,9 @@ static Constant *FoldBitCast(Constant *V, Type *DestTy) { // Canonicalize scalar-to-vector bitcasts into vector-to-vector bitcasts // This allows for other simplifications (although some of them // can only be handled by Analysis/ConstantFolding.cpp). - if (isa(V) || isa(V)) - return ConstantExpr::getBitCast(ConstantVector::get(V), DestPTy); + if (!isa(SrcTy)) + if (isa(V) || isa(V)) + return ConstantExpr::getBitCast(ConstantVector::get(V), DestPTy); return nullptr; } diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index 1cd5eb36c4ab695..b89c9ce46e7d614 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -492,6 +492,14 @@ class Verifier : public InstVisitor, VerifierSupport { /// Whether a metadata node is allowed to be, or contain, a DILocation. enum class AreDebugLocsAllowed { No, Yes }; + /// Metadata that should be treated as a range, with slightly different + /// requirements. + enum class RangeLikeMetadataKind { + Range, // MD_range + AbsoluteSymbol, // MD_absolute_symbol + NoaliasAddrspace // MD_noalias_addrspace + }; + // Verification methods... void visitGlobalValue(const GlobalValue &GV); void visitGlobalVariable(const GlobalVariable &GV); @@ -515,9 +523,10 @@ class Verifier : public InstVisitor, VerifierSupport { void visitModuleFlagCGProfileEntry(const MDOperand &MDO); void visitFunction(const Function &F); void visitBasicBlock(BasicBlock &BB); - void verifyRangeMetadata(const Value &V, const MDNode *Range, Type *Ty, - bool IsAbsoluteSymbol); + void verifyRangeLikeMetadata(const Value &V, const MDNode *Range, Type *Ty, + RangeLikeMetadataKind Kind); void visitRangeMetadata(Instruction &I, MDNode *Range, Type *Ty); + void visitNoaliasAddrspaceMetadata(Instruction &I, MDNode *Range, Type *Ty); void visitDereferenceableMetadata(Instruction &I, MDNode *MD); void visitProfMetadata(Instruction &I, MDNode *MD); void visitCallStackMetadata(MDNode *MD); @@ -760,8 +769,9 @@ void Verifier::visitGlobalValue(const GlobalValue &GV) { // FIXME: Why is getMetadata on GlobalValue protected? if (const MDNode *AbsoluteSymbol = GO->getMetadata(LLVMContext::MD_absolute_symbol)) { - verifyRangeMetadata(*GO, AbsoluteSymbol, DL.getIntPtrType(GO->getType()), - true); + verifyRangeLikeMetadata(*GO, AbsoluteSymbol, + DL.getIntPtrType(GO->getType()), + RangeLikeMetadataKind::AbsoluteSymbol); } } @@ -4136,8 +4146,8 @@ static bool isContiguous(const ConstantRange &A, const ConstantRange &B) { /// Verify !range and !absolute_symbol metadata. These have the same /// restrictions, except !absolute_symbol allows the full set. -void Verifier::verifyRangeMetadata(const Value &I, const MDNode *Range, - Type *Ty, bool IsAbsoluteSymbol) { +void Verifier::verifyRangeLikeMetadata(const Value &I, const MDNode *Range, + Type *Ty, RangeLikeMetadataKind Kind) { unsigned NumOperands = Range->getNumOperands(); Check(NumOperands % 2 == 0, "Unfinished range!", Range); unsigned NumRanges = NumOperands / 2; @@ -4154,8 +4164,14 @@ void Verifier::verifyRangeMetadata(const Value &I, const MDNode *Range, Check(High->getType() == Low->getType(), "Range pair types must match!", &I); - Check(High->getType() == Ty->getScalarType(), - "Range types must match instruction type!", &I); + + if (Kind == RangeLikeMetadataKind::NoaliasAddrspace) { + Check(High->getType()->isIntegerTy(32), + "noalias.addrspace type must be i32!", &I); + } else { + Check(High->getType() == Ty->getScalarType(), + "Range types must match instruction type!", &I); + } APInt HighV = High->getValue(); APInt LowV = Low->getValue(); @@ -4166,7 +4182,9 @@ void Verifier::verifyRangeMetadata(const Value &I, const MDNode *Range, "The upper and lower limits cannot be the same value", &I); ConstantRange CurRange(LowV, HighV); - Check(!CurRange.isEmptySet() && (IsAbsoluteSymbol || !CurRange.isFullSet()), + Check(!CurRange.isEmptySet() && + (Kind == RangeLikeMetadataKind::AbsoluteSymbol || + !CurRange.isFullSet()), "Range must not be empty!", Range); if (i != 0) { Check(CurRange.intersectWith(LastRange).isEmptySet(), @@ -4194,7 +4212,15 @@ void Verifier::verifyRangeMetadata(const Value &I, const MDNode *Range, void Verifier::visitRangeMetadata(Instruction &I, MDNode *Range, Type *Ty) { assert(Range && Range == I.getMetadata(LLVMContext::MD_range) && "precondition violation"); - verifyRangeMetadata(I, Range, Ty, false); + verifyRangeLikeMetadata(I, Range, Ty, RangeLikeMetadataKind::Range); +} + +void Verifier::visitNoaliasAddrspaceMetadata(Instruction &I, MDNode *Range, + Type *Ty) { + assert(Range && Range == I.getMetadata(LLVMContext::MD_noalias_addrspace) && + "precondition violation"); + verifyRangeLikeMetadata(I, Range, Ty, + RangeLikeMetadataKind::NoaliasAddrspace); } void Verifier::checkAtomicMemAccessSize(Type *Ty, const Instruction *I) { @@ -5187,6 +5213,13 @@ void Verifier::visitInstruction(Instruction &I) { visitRangeMetadata(I, Range, I.getType()); } + if (MDNode *Range = I.getMetadata(LLVMContext::MD_noalias_addrspace)) { + Check(isa(I) || isa(I) || isa(I) || + isa(I) || isa(I), + "noalias.addrspace are only for memory operations!", &I); + visitNoaliasAddrspaceMetadata(I, Range, I.getType()); + } + if (I.hasMetadata(LLVMContext::MD_invariant_group)) { Check(isa(I) || isa(I), "invariant.group metadata is only for loads and stores", &I); diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp index b5eb7953f23b09f..e1714b29399298a 100644 --- a/llvm/lib/LTO/LTO.cpp +++ b/llvm/lib/LTO/LTO.cpp @@ -578,10 +578,10 @@ LTO::RegularLTOState::RegularLTOState(unsigned ParallelCodeGenParallelismLevel, CombinedModule->IsNewDbgInfoFormat = UseNewDbgInfoFormat; } -LTO::ThinLTOState::ThinLTOState(ThinBackend Backend) - : Backend(Backend), CombinedIndex(/*HaveGVs*/ false) { - if (!Backend) - this->Backend = +LTO::ThinLTOState::ThinLTOState(ThinBackend BackendParam) + : Backend(std::move(BackendParam)), CombinedIndex(/*HaveGVs*/ false) { + if (!Backend.isValid()) + Backend = createInProcessThinBackend(llvm::heavyweight_hardware_concurrency()); } @@ -1368,75 +1368,41 @@ SmallVector LTO::getRuntimeLibcallSymbols(const Triple &TT) { return LibcallSymbols; } -/// This class defines the interface to the ThinLTO backend. -class lto::ThinBackendProc { -protected: - const Config &Conf; - ModuleSummaryIndex &CombinedIndex; - const DenseMap &ModuleToDefinedGVSummaries; - lto::IndexWriteCallback OnWrite; - bool ShouldEmitImportsFiles; +Error ThinBackendProc::emitFiles( + const FunctionImporter::ImportMapTy &ImportList, llvm::StringRef ModulePath, + const std::string &NewModulePath) const { + ModuleToSummariesForIndexTy ModuleToSummariesForIndex; + GVSummaryPtrSet DeclarationSummaries; -public: - ThinBackendProc( - const Config &Conf, ModuleSummaryIndex &CombinedIndex, - const DenseMap &ModuleToDefinedGVSummaries, - lto::IndexWriteCallback OnWrite, bool ShouldEmitImportsFiles) - : Conf(Conf), CombinedIndex(CombinedIndex), - ModuleToDefinedGVSummaries(ModuleToDefinedGVSummaries), - OnWrite(OnWrite), ShouldEmitImportsFiles(ShouldEmitImportsFiles) {} + std::error_code EC; + gatherImportedSummariesForModule(ModulePath, ModuleToDefinedGVSummaries, + ImportList, ModuleToSummariesForIndex, + DeclarationSummaries); - virtual ~ThinBackendProc() = default; - virtual Error start( - unsigned Task, BitcodeModule BM, - const FunctionImporter::ImportMapTy &ImportList, - const FunctionImporter::ExportSetTy &ExportList, - const std::map &ResolvedODR, - MapVector &ModuleMap) = 0; - virtual Error wait() = 0; - virtual unsigned getThreadCount() = 0; - - // Write sharded indices and (optionally) imports to disk - Error emitFiles(const FunctionImporter::ImportMapTy &ImportList, - llvm::StringRef ModulePath, - const std::string &NewModulePath) { - ModuleToSummariesForIndexTy ModuleToSummariesForIndex; - GVSummaryPtrSet DeclarationSummaries; - - std::error_code EC; - gatherImportedSummariesForModule(ModulePath, ModuleToDefinedGVSummaries, - ImportList, ModuleToSummariesForIndex, - DeclarationSummaries); - - raw_fd_ostream OS(NewModulePath + ".thinlto.bc", EC, - sys::fs::OpenFlags::OF_None); - if (EC) - return errorCodeToError(EC); - - writeIndexToFile(CombinedIndex, OS, &ModuleToSummariesForIndex, - &DeclarationSummaries); - - if (ShouldEmitImportsFiles) { - EC = EmitImportsFiles(ModulePath, NewModulePath + ".imports", - ModuleToSummariesForIndex); - if (EC) - return errorCodeToError(EC); - } - return Error::success(); + raw_fd_ostream OS(NewModulePath + ".thinlto.bc", EC, + sys::fs::OpenFlags::OF_None); + if (EC) + return createFileError("cannot open " + NewModulePath + ".thinlto.bc", EC); + + writeIndexToFile(CombinedIndex, OS, &ModuleToSummariesForIndex, + &DeclarationSummaries); + + if (ShouldEmitImportsFiles) { + Error ImportFilesError = EmitImportsFiles( + ModulePath, NewModulePath + ".imports", ModuleToSummariesForIndex); + if (ImportFilesError) + return ImportFilesError; } -}; + return Error::success(); +} namespace { class InProcessThinBackend : public ThinBackendProc { - DefaultThreadPool BackendThreadPool; AddStreamFn AddStream; FileCache Cache; DenseSet CfiFunctionDefs; DenseSet CfiFunctionDecls; - std::optional Err; - std::mutex ErrMu; - bool ShouldEmitIndexFiles; public: @@ -1447,9 +1413,9 @@ class InProcessThinBackend : public ThinBackendProc { AddStreamFn AddStream, FileCache Cache, lto::IndexWriteCallback OnWrite, bool ShouldEmitIndexFiles, bool ShouldEmitImportsFiles) : ThinBackendProc(Conf, CombinedIndex, ModuleToDefinedGVSummaries, - OnWrite, ShouldEmitImportsFiles), - BackendThreadPool(ThinLTOParallelism), AddStream(std::move(AddStream)), - Cache(std::move(Cache)), ShouldEmitIndexFiles(ShouldEmitIndexFiles) { + OnWrite, ShouldEmitImportsFiles, ThinLTOParallelism), + AddStream(std::move(AddStream)), Cache(std::move(Cache)), + ShouldEmitIndexFiles(ShouldEmitIndexFiles) { for (auto &Name : CombinedIndex.cfiFunctionDefs()) CfiFunctionDefs.insert( GlobalValue::getGUID(GlobalValue::dropLLVMManglingEscape(Name))); @@ -1546,18 +1512,6 @@ class InProcessThinBackend : public ThinBackendProc { OnWrite(std::string(ModulePath)); return Error::success(); } - - Error wait() override { - BackendThreadPool.wait(); - if (Err) - return std::move(*Err); - else - return Error::success(); - } - - unsigned getThreadCount() override { - return BackendThreadPool.getMaxConcurrency(); - } }; } // end anonymous namespace @@ -1565,7 +1519,7 @@ ThinBackend lto::createInProcessThinBackend(ThreadPoolStrategy Parallelism, lto::IndexWriteCallback OnWrite, bool ShouldEmitIndexFiles, bool ShouldEmitImportsFiles) { - return + auto Func = [=](const Config &Conf, ModuleSummaryIndex &CombinedIndex, const DenseMap &ModuleToDefinedGVSummaries, AddStreamFn AddStream, FileCache Cache) { @@ -1574,6 +1528,7 @@ ThinBackend lto::createInProcessThinBackend(ThreadPoolStrategy Parallelism, AddStream, Cache, OnWrite, ShouldEmitIndexFiles, ShouldEmitImportsFiles); }; + return ThinBackend(Func, Parallelism); } StringLiteral lto::getThinLTODefaultCPU(const Triple &TheTriple) { @@ -1618,12 +1573,13 @@ class WriteIndexesThinBackend : public ThinBackendProc { public: WriteIndexesThinBackend( const Config &Conf, ModuleSummaryIndex &CombinedIndex, + ThreadPoolStrategy ThinLTOParallelism, const DenseMap &ModuleToDefinedGVSummaries, std::string OldPrefix, std::string NewPrefix, std::string NativeObjectPrefix, bool ShouldEmitImportsFiles, raw_fd_ostream *LinkedObjectsFile, lto::IndexWriteCallback OnWrite) : ThinBackendProc(Conf, CombinedIndex, ModuleToDefinedGVSummaries, - OnWrite, ShouldEmitImportsFiles), + OnWrite, ShouldEmitImportsFiles, ThinLTOParallelism), OldPrefix(OldPrefix), NewPrefix(NewPrefix), NativeObjectPrefix(NativeObjectPrefix), LinkedObjectsFile(LinkedObjectsFile) {} @@ -1635,9 +1591,11 @@ class WriteIndexesThinBackend : public ThinBackendProc { const std::map &ResolvedODR, MapVector &ModuleMap) override { StringRef ModulePath = BM.getModuleIdentifier(); - std::string NewModulePath = - getThinLTOOutputFile(ModulePath, OldPrefix, NewPrefix); + // The contents of this file may be used as input to a native link, and must + // therefore contain the processed modules in a determinstic order that + // match the order they are provided on the command line. For that reason, + // we cannot include this in the asynchronously executed lambda below. if (LinkedObjectsFile) { std::string ObjectPrefix = NativeObjectPrefix.empty() ? NewPrefix : NativeObjectPrefix; @@ -1646,35 +1604,52 @@ class WriteIndexesThinBackend : public ThinBackendProc { *LinkedObjectsFile << LinkedObjectsFilePath << '\n'; } - if (auto E = emitFiles(ImportList, ModulePath, NewModulePath)) - return E; + BackendThreadPool.async( + [this](const StringRef ModulePath, + const FunctionImporter::ImportMapTy &ImportList, + const std::string &OldPrefix, const std::string &NewPrefix) { + std::string NewModulePath = + getThinLTOOutputFile(ModulePath, OldPrefix, NewPrefix); + auto E = emitFiles(ImportList, ModulePath, NewModulePath); + if (E) { + std::unique_lock L(ErrMu); + if (Err) + Err = joinErrors(std::move(*Err), std::move(E)); + else + Err = std::move(E); + return; + } + }, + ModulePath, ImportList, OldPrefix, NewPrefix); if (OnWrite) OnWrite(std::string(ModulePath)); return Error::success(); } - Error wait() override { return Error::success(); } - - // WriteIndexesThinBackend should always return 1 to prevent module - // re-ordering and avoid non-determinism in the final link. - unsigned getThreadCount() override { return 1; } + bool isSensitiveToInputOrder() override { + // The order which modules are written to LinkedObjectsFile should be + // deterministic and match the order they are passed on the command line. + return true; + } }; } // end anonymous namespace ThinBackend lto::createWriteIndexesThinBackend( - std::string OldPrefix, std::string NewPrefix, - std::string NativeObjectPrefix, bool ShouldEmitImportsFiles, - raw_fd_ostream *LinkedObjectsFile, IndexWriteCallback OnWrite) { - return + ThreadPoolStrategy Parallelism, std::string OldPrefix, + std::string NewPrefix, std::string NativeObjectPrefix, + bool ShouldEmitImportsFiles, raw_fd_ostream *LinkedObjectsFile, + IndexWriteCallback OnWrite) { + auto Func = [=](const Config &Conf, ModuleSummaryIndex &CombinedIndex, const DenseMap &ModuleToDefinedGVSummaries, AddStreamFn AddStream, FileCache Cache) { return std::make_unique( - Conf, CombinedIndex, ModuleToDefinedGVSummaries, OldPrefix, - NewPrefix, NativeObjectPrefix, ShouldEmitImportsFiles, + Conf, CombinedIndex, Parallelism, ModuleToDefinedGVSummaries, + OldPrefix, NewPrefix, NativeObjectPrefix, ShouldEmitImportsFiles, LinkedObjectsFile, OnWrite); }; + return ThinBackend(Func, Parallelism); } Error LTO::runThinLTO(AddStreamFn AddStream, FileCache Cache, @@ -1854,7 +1829,8 @@ Error LTO::runThinLTO(AddStreamFn AddStream, FileCache Cache, ResolvedODR[Mod.first], ThinLTO.ModuleMap); }; - if (BackendProcess->getThreadCount() == 1) { + if (BackendProcess->getThreadCount() == 1 || + BackendProcess->isSensitiveToInputOrder()) { // Process the modules in the order they were provided on the // command-line. It is important for this codepath to be used for // WriteIndexesThinBackend, to ensure the emitted LinkedObjectsFile lists diff --git a/llvm/lib/LTO/ThinLTOCodeGenerator.cpp b/llvm/lib/LTO/ThinLTOCodeGenerator.cpp index 76268c950cf581f..8074f8690cc1ceb 100644 --- a/llvm/lib/LTO/ThinLTOCodeGenerator.cpp +++ b/llvm/lib/LTO/ThinLTOCodeGenerator.cpp @@ -837,9 +837,8 @@ void ThinLTOCodeGenerator::emitImports(Module &TheModule, StringRef OutputName, ModuleIdentifier, ModuleToDefinedGVSummaries, ImportLists[ModuleIdentifier], ModuleToSummariesForIndex, DecSummaries); - std::error_code EC; - if ((EC = EmitImportsFiles(ModuleIdentifier, OutputName, - ModuleToSummariesForIndex))) + if (Error EC = EmitImportsFiles(ModuleIdentifier, OutputName, + ModuleToSummariesForIndex)) report_fatal_error(Twine("Failed to open ") + OutputName + " to save imports lists\n"); } diff --git a/llvm/lib/ProfileData/MemProfReader.cpp b/llvm/lib/ProfileData/MemProfReader.cpp index 58622e5ed254ea6..e3c85e179ac43b9 100644 --- a/llvm/lib/ProfileData/MemProfReader.cpp +++ b/llvm/lib/ProfileData/MemProfReader.cpp @@ -529,9 +529,7 @@ Error RawMemProfReader::mapRawProfileToRecords() { // first non-inline frame. for (size_t I = 0; /*Break out using the condition below*/; I++) { const Frame &F = idToFrame(Callstack[I]); - auto Result = - FunctionProfileData.insert({F.Function, IndexedMemProfRecord()}); - IndexedMemProfRecord &Record = Result.first->second; + IndexedMemProfRecord &Record = FunctionProfileData[F.Function]; Record.AllocSites.emplace_back(Callstack, CSId, MIB); if (!F.IsInlineFrame) @@ -543,8 +541,7 @@ Error RawMemProfReader::mapRawProfileToRecords() { for (const auto &[Id, Locs] : PerFunctionCallSites) { // Some functions may have only callsite data and no allocation data. Here // we insert a new entry for callsite data if we need to. - auto Result = FunctionProfileData.insert({Id, IndexedMemProfRecord()}); - IndexedMemProfRecord &Record = Result.first->second; + IndexedMemProfRecord &Record = FunctionProfileData[Id]; for (LocationPtr Loc : Locs) { CallStackId CSId = hashCallStack(*Loc); CSIdToCallStack.insert({CSId, *Loc}); diff --git a/llvm/lib/Support/TimeProfiler.cpp b/llvm/lib/Support/TimeProfiler.cpp index c2014028ddadca5..29c1c97ddf3c524 100644 --- a/llvm/lib/Support/TimeProfiler.cpp +++ b/llvm/lib/Support/TimeProfiler.cpp @@ -13,6 +13,7 @@ #include "llvm/Support/TimeProfiler.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/STLFunctionalExtras.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" #include "llvm/Support/JSON.h" #include "llvm/Support/Path.h" @@ -75,18 +76,18 @@ struct llvm::TimeTraceProfilerEntry { const std::string Name; TimeTraceMetadata Metadata; - const bool AsyncEvent = false; + const TimeTraceEventType EventType = TimeTraceEventType::CompleteEvent; TimeTraceProfilerEntry(TimePointType &&S, TimePointType &&E, std::string &&N, - std::string &&Dt, bool Ae) + std::string &&Dt, TimeTraceEventType Et) : Start(std::move(S)), End(std::move(E)), Name(std::move(N)), Metadata(), - AsyncEvent(Ae) { + EventType(Et) { Metadata.Detail = std::move(Dt); } TimeTraceProfilerEntry(TimePointType &&S, TimePointType &&E, std::string &&N, - TimeTraceMetadata &&Mt, bool Ae) + TimeTraceMetadata &&Mt, TimeTraceEventType Et) : Start(std::move(S)), End(std::move(E)), Name(std::move(N)), - Metadata(std::move(Mt)), AsyncEvent(Ae) {} + Metadata(std::move(Mt)), EventType(Et) {} // Calculate timings for FlameGraph. Cast time points to microsecond precision // rather than casting duration. This avoids truncation issues causing inner @@ -104,6 +105,25 @@ struct llvm::TimeTraceProfilerEntry { } }; +// Represents a currently open (in-progress) time trace entry. InstantEvents +// that happen during an open event are associated with the duration of this +// parent event and they are dropped if parent duration is shorter than +// the granularity. +struct InProgressEntry { + TimeTraceProfilerEntry Event; + std::vector InstantEvents; + + InProgressEntry(TimePointType S, TimePointType E, std::string N, + std::string Dt, TimeTraceEventType Et) + : Event(std::move(S), std::move(E), std::move(N), std::move(Dt), Et), + InstantEvents() {} + + InProgressEntry(TimePointType S, TimePointType E, std::string N, + TimeTraceMetadata Mt, TimeTraceEventType Et) + : Event(std::move(S), std::move(E), std::move(N), std::move(Mt), Et), + InstantEvents() {} +}; + struct llvm::TimeTraceProfiler { TimeTraceProfiler(unsigned TimeTraceGranularity = 0, StringRef ProcName = "", bool TimeTraceVerbose = false) @@ -114,27 +134,40 @@ struct llvm::TimeTraceProfiler { llvm::get_thread_name(ThreadName); } - TimeTraceProfilerEntry *begin(std::string Name, - llvm::function_ref Detail, - bool AsyncEvent = false) { - Stack.emplace_back(std::make_unique( + TimeTraceProfilerEntry * + begin(std::string Name, llvm::function_ref Detail, + TimeTraceEventType EventType = TimeTraceEventType::CompleteEvent) { + assert(EventType != TimeTraceEventType::InstantEvent && + "Instant Events don't have begin and end."); + Stack.emplace_back(std::make_unique( ClockType::now(), TimePointType(), std::move(Name), Detail(), - AsyncEvent)); - return Stack.back().get(); + EventType)); + return &Stack.back()->Event; } TimeTraceProfilerEntry * begin(std::string Name, llvm::function_ref Metadata, - bool AsyncEvent = false) { - Stack.emplace_back(std::make_unique( + TimeTraceEventType EventType = TimeTraceEventType::CompleteEvent) { + assert(EventType != TimeTraceEventType::InstantEvent && + "Instant Events don't have begin and end."); + Stack.emplace_back(std::make_unique( ClockType::now(), TimePointType(), std::move(Name), Metadata(), - AsyncEvent)); - return Stack.back().get(); + EventType)); + return &Stack.back()->Event; + } + + void insert(std::string Name, llvm::function_ref Detail) { + if (Stack.empty()) + return; + + Stack.back().get()->InstantEvents.emplace_back(TimeTraceProfilerEntry( + ClockType::now(), TimePointType(), std::move(Name), Detail(), + TimeTraceEventType::InstantEvent)); } void end() { assert(!Stack.empty() && "Must call begin() first"); - end(*Stack.back()); + end(Stack.back()->Event); } void end(TimeTraceProfilerEntry &E) { @@ -144,9 +177,19 @@ struct llvm::TimeTraceProfiler { // Calculate duration at full precision for overall counts. DurationType Duration = E.End - E.Start; + const auto *Iter = + llvm::find_if(Stack, [&](const std::unique_ptr &Val) { + return &Val->Event == &E; + }); + assert(Iter != Stack.end() && "Event not in the Stack"); + // Only include sections longer or equal to TimeTraceGranularity msec. - if (duration_cast(Duration).count() >= TimeTraceGranularity) + if (duration_cast(Duration).count() >= TimeTraceGranularity) { Entries.emplace_back(E); + for (auto &IE : Iter->get()->InstantEvents) { + Entries.emplace_back(IE); + } + } // Track total time taken by each "name", but only the topmost levels of // them; e.g. if there's a template instantiation that instantiates other @@ -154,18 +197,15 @@ struct llvm::TimeTraceProfiler { // happens to be the ones that don't have any currently open entries above // itself. if (llvm::none_of(llvm::drop_begin(llvm::reverse(Stack)), - [&](const std::unique_ptr &Val) { - return Val->Name == E.Name; + [&](const std::unique_ptr &Val) { + return Val->Event.Name == E.Name; })) { auto &CountAndTotal = CountAndTotalPerName[E.Name]; CountAndTotal.first++; CountAndTotal.second += Duration; }; - llvm::erase_if(Stack, - [&](const std::unique_ptr &Val) { - return Val.get() == &E; - }); + Stack.erase(Iter); } // Write events from this TimeTraceProfilerInstance and @@ -194,13 +234,17 @@ struct llvm::TimeTraceProfiler { J.attribute("pid", Pid); J.attribute("tid", int64_t(Tid)); J.attribute("ts", StartUs); - if (E.AsyncEvent) { + if (E.EventType == TimeTraceEventType::AsyncEvent) { J.attribute("cat", E.Name); J.attribute("ph", "b"); J.attribute("id", 0); - } else { + } else if (E.EventType == TimeTraceEventType::CompleteEvent) { J.attribute("ph", "X"); J.attribute("dur", DurUs); + } else { // instant event + assert(E.EventType == TimeTraceEventType::InstantEvent && + "InstantEvent expected"); + J.attribute("ph", "i"); } J.attribute("name", E.Name); if (!E.Metadata.isEmpty()) { @@ -215,7 +259,7 @@ struct llvm::TimeTraceProfiler { } }); - if (E.AsyncEvent) { + if (E.EventType == TimeTraceEventType::AsyncEvent) { J.object([&] { J.attribute("pid", Pid); J.attribute("tid", int64_t(Tid)); @@ -319,7 +363,7 @@ struct llvm::TimeTraceProfiler { J.objectEnd(); } - SmallVector, 16> Stack; + SmallVector, 16> Stack; SmallVector Entries; StringMap CountAndTotalPerName; // System clock time when the session was begun. @@ -406,7 +450,8 @@ TimeTraceProfilerEntry *llvm::timeTraceProfilerBegin(StringRef Name, StringRef Detail) { if (TimeTraceProfilerInstance != nullptr) return TimeTraceProfilerInstance->begin( - std::string(Name), [&]() { return std::string(Detail); }, false); + std::string(Name), [&]() { return std::string(Detail); }, + TimeTraceEventType::CompleteEvent); return nullptr; } @@ -414,7 +459,8 @@ TimeTraceProfilerEntry * llvm::timeTraceProfilerBegin(StringRef Name, llvm::function_ref Detail) { if (TimeTraceProfilerInstance != nullptr) - return TimeTraceProfilerInstance->begin(std::string(Name), Detail, false); + return TimeTraceProfilerInstance->begin(std::string(Name), Detail, + TimeTraceEventType::CompleteEvent); return nullptr; } @@ -422,7 +468,8 @@ TimeTraceProfilerEntry * llvm::timeTraceProfilerBegin(StringRef Name, llvm::function_ref Metadata) { if (TimeTraceProfilerInstance != nullptr) - return TimeTraceProfilerInstance->begin(std::string(Name), Metadata, false); + return TimeTraceProfilerInstance->begin(std::string(Name), Metadata, + TimeTraceEventType::CompleteEvent); return nullptr; } @@ -430,10 +477,17 @@ TimeTraceProfilerEntry *llvm::timeTraceAsyncProfilerBegin(StringRef Name, StringRef Detail) { if (TimeTraceProfilerInstance != nullptr) return TimeTraceProfilerInstance->begin( - std::string(Name), [&]() { return std::string(Detail); }, true); + std::string(Name), [&]() { return std::string(Detail); }, + TimeTraceEventType::AsyncEvent); return nullptr; } +void llvm::timeTraceAddInstantEvent(StringRef Name, + llvm::function_ref Detail) { + if (TimeTraceProfilerInstance != nullptr) + TimeTraceProfilerInstance->insert(std::string(Name), Detail); +} + void llvm::timeTraceProfilerEnd() { if (TimeTraceProfilerInstance != nullptr) TimeTraceProfilerInstance->end(); diff --git a/llvm/lib/Target/AArch64/AArch64FMV.td b/llvm/lib/Target/AArch64/AArch64FMV.td index 8266507379f3b1b..c063d3f38ba19c5 100644 --- a/llvm/lib/Target/AArch64/AArch64FMV.td +++ b/llvm/lib/Target/AArch64/AArch64FMV.td @@ -81,8 +81,7 @@ def : FMVExtension<"sme", "FEAT_SME", "+sme,+bf16", 430>; def : FMVExtension<"sme-f64f64", "FEAT_SME_F64", "+sme,+sme-f64f64,+bf16", 560>; def : FMVExtension<"sme-i16i64", "FEAT_SME_I64", "+sme,+sme-i16i64,+bf16", 570>; def : FMVExtension<"sme2", "FEAT_SME2", "+sme2,+sme,+bf16", 580>; -def : FMVExtension<"ssbs", "FEAT_SSBS", "", 490>; -def : FMVExtension<"ssbs2", "FEAT_SSBS2", "+ssbs", 500>; +def : FMVExtension<"ssbs", "FEAT_SSBS2", "+ssbs", 490>; def : FMVExtension<"sve", "FEAT_SVE", "+sve,+fullfp16,+fp-armv8,+neon", 310>; def : FMVExtension<"sve-bf16", "FEAT_SVE_BF16", "+sve,+bf16,+fullfp16,+fp-armv8,+neon", 320>; def : FMVExtension<"sve-ebf16", "FEAT_SVE_EBF16", "+sve,+bf16,+fullfp16,+fp-armv8,+neon", 330>; diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 460ac79991e2334..288fd3639e5eb7b 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -268,6 +268,7 @@ static bool isMergePassthruOpcode(unsigned Opc) { case AArch64ISD::FP_EXTEND_MERGE_PASSTHRU: case AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU: case AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU: + case AArch64ISD::FCVTX_MERGE_PASSTHRU: case AArch64ISD::FCVTZU_MERGE_PASSTHRU: case AArch64ISD::FCVTZS_MERGE_PASSTHRU: case AArch64ISD::FSQRT_MERGE_PASSTHRU: @@ -2652,6 +2653,7 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const { MAKE_CASE(AArch64ISD::FP_EXTEND_MERGE_PASSTHRU) MAKE_CASE(AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU) MAKE_CASE(AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU) + MAKE_CASE(AArch64ISD::FCVTX_MERGE_PASSTHRU) MAKE_CASE(AArch64ISD::FCVTZU_MERGE_PASSTHRU) MAKE_CASE(AArch64ISD::FCVTZS_MERGE_PASSTHRU) MAKE_CASE(AArch64ISD::FSQRT_MERGE_PASSTHRU) @@ -4416,6 +4418,19 @@ SDValue AArch64TargetLowering::LowerFP_ROUND(SDValue Op, // Set the quiet bit. if (!DAG.isKnownNeverSNaN(SrcVal)) NaN = DAG.getNode(ISD::OR, DL, I32, Narrow, ImmV(0x400000)); + } else if (SrcVT == MVT::nxv2f64 && + (Subtarget->hasSVE2() || Subtarget->isStreamingSVEAvailable())) { + // Round to float without introducing rounding errors and try again. + SDValue Pg = getPredicateForVector(DAG, DL, MVT::nxv2f32); + Narrow = DAG.getNode(AArch64ISD::FCVTX_MERGE_PASSTHRU, DL, MVT::nxv2f32, + Pg, SrcVal, DAG.getUNDEF(MVT::nxv2f32)); + + SmallVector NewOps; + if (IsStrict) + NewOps.push_back(Op.getOperand(0)); + NewOps.push_back(Narrow); + NewOps.push_back(Op.getOperand(IsStrict ? 2 : 1)); + return DAG.getNode(Op.getOpcode(), DL, VT, NewOps, Op->getFlags()); } else return SDValue(); diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index 480bf60360bf550..1bae7562f459a52 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -158,6 +158,7 @@ enum NodeType : unsigned { FP_EXTEND_MERGE_PASSTHRU, UINT_TO_FP_MERGE_PASSTHRU, SINT_TO_FP_MERGE_PASSTHRU, + FCVTX_MERGE_PASSTHRU, FCVTZU_MERGE_PASSTHRU, FCVTZS_MERGE_PASSTHRU, SIGN_EXTEND_INREG_MERGE_PASSTHRU, diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index e78c67abeca30e3..2a857234c7d745b 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -357,6 +357,7 @@ def AArch64fcvtr_mt : SDNode<"AArch64ISD::FP_ROUND_MERGE_PASSTHRU", SDT_AArch64 def AArch64fcvte_mt : SDNode<"AArch64ISD::FP_EXTEND_MERGE_PASSTHRU", SDT_AArch64FCVT>; def AArch64ucvtf_mt : SDNode<"AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU", SDT_AArch64FCVT>; def AArch64scvtf_mt : SDNode<"AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU", SDT_AArch64FCVT>; +def AArch64fcvtx_mt : SDNode<"AArch64ISD::FCVTX_MERGE_PASSTHRU", SDT_AArch64FCVT>; def AArch64fcvtzu_mt : SDNode<"AArch64ISD::FCVTZU_MERGE_PASSTHRU", SDT_AArch64FCVT>; def AArch64fcvtzs_mt : SDNode<"AArch64ISD::FCVTZS_MERGE_PASSTHRU", SDT_AArch64FCVT>; @@ -3788,7 +3789,7 @@ let Predicates = [HasSVE2orSME, UseExperimentalZeroingPseudos] in { let Predicates = [HasSVE2orSME] in { // SVE2 floating-point convert precision defm FCVTXNT_ZPmZ : sve2_fp_convert_down_odd_rounding_top<"fcvtxnt", "int_aarch64_sve_fcvtxnt">; - defm FCVTX_ZPmZ : sve2_fp_convert_down_odd_rounding<"fcvtx", "int_aarch64_sve_fcvtx">; + defm FCVTX_ZPmZ : sve2_fp_convert_down_odd_rounding<"fcvtx", "int_aarch64_sve_fcvtx", AArch64fcvtx_mt>; defm FCVTNT_ZPmZ : sve2_fp_convert_down_narrow<"fcvtnt", "int_aarch64_sve_fcvtnt">; defm FCVTLT_ZPmZ : sve2_fp_convert_up_long<"fcvtlt", "int_aarch64_sve_fcvtlt">; diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h index accfb49c6fbe3ab..9856415361e50d7 100644 --- a/llvm/lib/Target/AArch64/AArch64Subtarget.h +++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h @@ -188,10 +188,14 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo { (hasSMEFA64() || (!isStreaming() && !isStreamingCompatible())); } - /// Returns true if the target has access to either the full range of SVE instructions, - /// or the streaming-compatible subset of SVE instructions. + /// Returns true if the target has access to the streaming-compatible subset + /// of SVE instructions. + bool isStreamingSVEAvailable() const { return hasSME() && isStreaming(); } + + /// Returns true if the target has access to either the full range of SVE + /// instructions, or the streaming-compatible subset of SVE instructions. bool isSVEorStreamingSVEAvailable() const { - return hasSVE() || (hasSME() && isStreaming()); + return hasSVE() || isStreamingSVEAvailable(); } unsigned getMinVectorRegisterBitWidth() const { diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp index 3d313ca00f1259f..a69894839361bc1 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -422,7 +422,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) return std::pair(0, LLT::scalar(VecTy.getSizeInBits())); }) .customIf(IsPtrVecPred) - .scalarizeIf(typeInSet(0, {v2s16, v2s8}), 0); + .scalarizeIf(typeInSet(0, {v2s16, v2s8}), 0) + .scalarizeIf(scalarOrEltWiderThan(0, 64), 0); StoreActions .customIf([=](const LegalityQuery &Query) { @@ -463,7 +464,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) return std::pair(0, LLT::scalar(VecTy.getSizeInBits())); }) .customIf(IsPtrVecPred) - .scalarizeIf(typeInSet(0, {v2s16, v2s8}), 0); + .scalarizeIf(typeInSet(0, {v2s16, v2s8}), 0) + .scalarizeIf(scalarOrEltWiderThan(0, 64), 0); getActionDefinitionsBuilder(G_INDEXED_STORE) // Idx 0 == Ptr, Idx 1 == Val diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td index 4a720270df91203..f655526fa81cfe4 100644 --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -3061,9 +3061,11 @@ multiclass sve2_fp_un_pred_zeroing_hsd { def : SVE_1_Op_PassthruZero_Pat(NAME # _D_ZERO)>; } -multiclass sve2_fp_convert_down_odd_rounding { +multiclass sve2_fp_convert_down_odd_rounding { def _DtoS : sve_fp_2op_p_zd<0b0001010, asm, ZPR64, ZPR32, ElementSizeD>; + def : SVE_3_Op_Pat(op # _f32f64), nxv4f32, nxv2i1, nxv2f64, !cast(NAME # _DtoS)>; + def : SVE_1_Op_Passthru_Pat(NAME # _DtoS)>; } //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 0d153df5c3977c5..0c2ae382f53a197 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -8903,7 +8903,7 @@ bool SIInstrInfo::isBasicBlockPrologue(const MachineInstr &MI, uint16_t Opcode = MI.getOpcode(); return IsNullOrVectorRegister && - (isSGPRSpill(Opcode) || + (isSGPRSpill(Opcode) || isWWMRegSpillOpcode(Opcode) || (!MI.isTerminator() && Opcode != AMDGPU::COPY && MI.modifiesRegister(AMDGPU::EXEC, &RI))); } diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp index a8cdbbd8a3c5be5..de9cbe403ab6182 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -2678,12 +2678,18 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, Register TmpReg; + // FIXME: Scavenger should figure out that the result register is + // available. Also should do this for the v_add case. + if (OtherOp.isReg() && OtherOp.getReg() != DstOp.getReg()) + TmpReg = DstOp.getReg(); + if (FrameReg && !ST.enableFlatScratch()) { // FIXME: In the common case where the add does not also read its result // (i.e. this isn't a reg += fi), it's not finding the dest reg as // available. - TmpReg = RS->scavengeRegisterBackwards(AMDGPU::SReg_32_XM0RegClass, MI, - false, 0); + if (!TmpReg) + TmpReg = RS->scavengeRegisterBackwards(AMDGPU::SReg_32_XM0RegClass, + MI, false, 0); BuildMI(*MBB, *MI, DL, TII->get(AMDGPU::S_LSHR_B32)) .addDef(TmpReg, RegState::Renamable) .addReg(FrameReg) @@ -2711,7 +2717,8 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, if (!TmpReg && MaterializedReg == FrameReg) { TmpReg = RS->scavengeRegisterBackwards(AMDGPU::SReg_32_XM0RegClass, - MI, false, 0); + MI, /*RestoreAfter=*/false, 0, + /*AllowSpill=*/false); DstReg = TmpReg; } diff --git a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp index 0bcd03c7fad38dd..1e84a7216013da0 100644 --- a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp +++ b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp @@ -570,6 +570,7 @@ static bool expandIntrinsic(Function &F, CallInst *Orig) { break; case Intrinsic::dx_step: Result = expandStepIntrinsic(Orig); + break; case Intrinsic::dx_radians: Result = expandRadiansIntrinsic(Orig); break; diff --git a/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp b/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp index 1a59f04b2140426..be714b5c87895ab 100644 --- a/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp +++ b/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp @@ -13,7 +13,17 @@ #include "llvm/IR/Intrinsics.h" #include "llvm/IR/IntrinsicsDirectX.h" -bool llvm::DirectXTTIImpl::isTargetIntrinsicTriviallyScalarizable( +using namespace llvm; + +bool DirectXTTIImpl::isTargetIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, + unsigned ScalarOpdIdx) { + switch (ID) { + default: + return false; + } +} + +bool DirectXTTIImpl::isTargetIntrinsicTriviallyScalarizable( Intrinsic::ID ID) const { switch (ID) { case Intrinsic::dx_frac: diff --git a/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.h b/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.h index 48414549f834957..30b57ed97d6370b 100644 --- a/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.h +++ b/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.h @@ -35,6 +35,8 @@ class DirectXTTIImpl : public BasicTTIImplBase { TLI(ST->getTargetLowering()) {} unsigned getMinVectorRegisterBitWidth() const { return 32; } bool isTargetIntrinsicTriviallyScalarizable(Intrinsic::ID ID) const; + bool isTargetIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, + unsigned ScalarOpdIdx); }; } // namespace llvm diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp index 78d6d7587160a3c..e68674e830436f7 100644 --- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp +++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp @@ -41,6 +41,7 @@ #include "llvm/TargetParser/RISCVISAInfo.h" #include +#include using namespace llvm; @@ -719,6 +720,8 @@ struct RISCVOperand final : public MCParsedAsmOperand { bool isUImm16() const { return IsUImm<16>(); } bool isUImm20() const { return IsUImm<20>(); } bool isUImm32() const { return IsUImm<32>(); } + bool isUImm48() const { return IsUImm<48>(); } + bool isUImm64() const { return IsUImm<64>(); } bool isUImm8GE32() const { int64_t Imm; @@ -3166,8 +3169,8 @@ bool RISCVAsmParser::parseDirectiveInsn(SMLoc L) { StringRef Format; SMLoc ErrorLoc = Parser.getTok().getLoc(); if (Parser.parseIdentifier(Format)) { - // Try parsing .insn [length], value - int64_t Length = 0; + // Try parsing .insn [ length , ] value + std::optional Length; int64_t Value = 0; if (Parser.parseIntToken( Value, "expected instruction format or an integer constant")) @@ -3176,25 +3179,66 @@ bool RISCVAsmParser::parseDirectiveInsn(SMLoc L) { Length = Value; if (Parser.parseIntToken(Value, "expected an integer constant")) return true; + + if (*Length == 0 || (*Length % 2) != 0) + return Error(ErrorLoc, + "instruction lengths must be a non-zero multiple of two"); + + // TODO: Support Instructions > 64 bits. + if (*Length > 8) + return Error(ErrorLoc, + "instruction lengths over 64 bits are not supported"); + } + + // We only derive a length from the encoding for 16- and 32-bit + // instructions, as the encodings for longer instructions are not frozen in + // the spec. + int64_t EncodingDerivedLength = ((Value & 0b11) == 0b11) ? 4 : 2; + + if (Length) { + // Only check the length against the encoding if the length is present and + // could match + if ((*Length <= 4) && (*Length != EncodingDerivedLength)) + return Error(ErrorLoc, + "instruction length does not match the encoding"); + + if (!isUIntN(*Length * 8, Value)) + return Error(ErrorLoc, "encoding value does not fit into instruction"); + } else { + if (!isUIntN(EncodingDerivedLength * 8, Value)) + return Error(ErrorLoc, "encoding value does not fit into instruction"); } - // TODO: Add support for long instructions - int64_t RealLength = (Value & 3) == 3 ? 4 : 2; - if (!isUIntN(RealLength * 8, Value)) - return Error(ErrorLoc, "invalid operand for instruction"); - if (RealLength == 2 && !AllowC) + if (!AllowC && (EncodingDerivedLength == 2)) return Error(ErrorLoc, "compressed instructions are not allowed"); - if (Length != 0 && Length != RealLength) - return Error(ErrorLoc, "instruction length mismatch"); if (getParser().parseEOL("invalid operand for instruction")) { getParser().eatToEndOfStatement(); return true; } - emitToStreamer(getStreamer(), MCInstBuilder(RealLength == 2 ? RISCV::Insn16 - : RISCV::Insn32) - .addImm(Value)); + unsigned Opcode; + if (Length) { + switch (*Length) { + case 2: + Opcode = RISCV::Insn16; + break; + case 4: + Opcode = RISCV::Insn32; + break; + case 6: + Opcode = RISCV::Insn48; + break; + case 8: + Opcode = RISCV::Insn64; + break; + default: + llvm_unreachable("Error should have already been emitted"); + } + } else + Opcode = (EncodingDerivedLength == 2) ? RISCV::Insn16 : RISCV::Insn32; + + emitToStreamer(getStreamer(), MCInstBuilder(Opcode).addImm(Value)); return false; } diff --git a/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp b/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp index cfe8644b892298f..7f14e98b5bc6aba 100644 --- a/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp +++ b/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp @@ -800,6 +800,7 @@ void RISCVInstructionSelector::preISelLower(MachineInstr &MI, replacePtrWithInt(MI.getOperand(1), MIB); MI.setDesc(TII.get(TargetOpcode::G_AND)); MRI->setType(DstReg, sXLen); + break; } } } diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h index cf3ea3e4ea2131b..d82f78498418da7 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h @@ -309,6 +309,8 @@ enum OperandType : unsigned { OPERAND_UIMM12, OPERAND_UIMM16, OPERAND_UIMM32, + OPERAND_UIMM48, + OPERAND_UIMM64, OPERAND_ZERO, OPERAND_SIMM5, OPERAND_SIMM5_PLUS1, diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp index eb21498d15e86c4..66970ed37f27247 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp @@ -355,6 +355,21 @@ void RISCVMCCodeEmitter::encodeInstruction(const MCInst &MI, support::endian::write(CB, Bits, llvm::endianness::little); break; } + case 6: { + uint64_t Bits = getBinaryCodeForInstr(MI, Fixups, STI) & 0xffff'ffff'ffffu; + SmallVector Encoding; + support::endian::write(Encoding, Bits, llvm::endianness::little); + assert(Encoding[6] == 0 && Encoding[7] == 0 && + "Unexpected encoding for 48-bit instruction"); + Encoding.truncate(6); + CB.append(Encoding); + break; + } + case 8: { + uint64_t Bits = getBinaryCodeForInstr(MI, Fixups, STI); + support::endian::write(CB, Bits, llvm::endianness::little); + break; + } } ++MCNumEmitted; // Keep track of the # of mi's emitted. diff --git a/llvm/lib/Target/RISCV/RISCVInstrFormats.td b/llvm/lib/Target/RISCV/RISCVInstrFormats.td index fcea18f81b39013..013c26c72bfd554 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrFormats.td +++ b/llvm/lib/Target/RISCV/RISCVInstrFormats.td @@ -266,6 +266,22 @@ class RVInst pattern, InstFormat format> + : RVInstCommon { + field bits<48> Inst; + field bits<48> SoftFail = 0; + let Size = 6; +} + +class RVInst64 pattern, InstFormat format> + : RVInstCommon { + field bits<64> Inst; + field bits<64> SoftFail = 0; + let Size = 8; +} + // Pseudo instructions class Pseudo pattern, string opcodestr = "", string argstr = ""> : RVInst { diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td index 0f16b2a9739dd3a..5d329dceac65190 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td @@ -241,6 +241,8 @@ def uimm7 : RISCVUImmOp<7>; def uimm8 : RISCVUImmOp<8>; def uimm16 : RISCVUImmOp<16>; def uimm32 : RISCVUImmOp<32>; +def uimm48 : RISCVUImmOp<48>; +def uimm64 : RISCVUImmOp<64>; def simm12 : RISCVSImmLeafOp<12> { let MCOperandPredicate = [{ int64_t Imm; @@ -1155,6 +1157,16 @@ def Insn32 : RVInst<(outs), (ins uimm32:$value), "", "", [], InstFormatOther> { let Inst{31-0} = value; let AsmString = ".insn 0x4, $value"; } +def Insn48 : RVInst48<(outs), (ins uimm48:$value), "", "", [], InstFormatOther> { + bits<48> value; + let Inst{47-0} = value; + let AsmString = ".insn 0x6, $value"; +} +def Insn64 : RVInst64<(outs), (ins uimm64:$value), "", "", [], InstFormatOther> { + bits<64> value; + let Inst{63-0} = value; + let AsmString = ".insn 0x8, $value"; +} } // Use InstAliases to match these so that we can combine the insn and format diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp index 1e5321a9ace41b1..a61461681f79ede 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -343,49 +343,6 @@ RISCVTTIImpl::getConstantPoolLoadCost(Type *Ty, TTI::TargetCostKind CostKind) { /*AddressSpace=*/0, CostKind); } -InstructionCost -RISCVTTIImpl::isMultipleInsertSubvector(VectorType *Tp, ArrayRef Mask, - TTI::TargetCostKind CostKind) { - if (!isa(Tp)) - return InstructionCost::getInvalid(); - std::pair LT = getTypeLegalizationCost(Tp); - if (LT.second.getScalarSizeInBits() == 1) - return InstructionCost::getInvalid(); - // Try to guess SubTp. - for (unsigned SubVecSize = 1, E = Mask.size(); SubVecSize < E; - SubVecSize <<= 1) { - if (E % SubVecSize != 0) - continue; - SmallVector RepeatedPattern(createSequentialMask(0, SubVecSize, 0)); - bool Skip = false; - for (unsigned I = 0; I != E; I += SubVecSize) - if (!Mask.slice(I, SubVecSize).equals(RepeatedPattern)) { - Skip = true; - break; - } - if (Skip) - continue; - InstructionCost Cost = 0; - unsigned NumSlides = Log2_32(E / SubVecSize); - // The cost of extraction from a subvector is 0 if the index is 0. - for (unsigned I = 0; I != NumSlides; ++I) { - unsigned InsertIndex = SubVecSize * (1 << I); - FixedVectorType *SubTp = FixedVectorType::get( - cast(Tp)->getElementType(), InsertIndex); - FixedVectorType *DesTp = - FixedVectorType::getDoubleElementsVectorType(SubTp); - std::pair DesLT = getTypeLegalizationCost(DesTp); - // Add the cost of whole vector register move because the destination - // vector register group for vslideup cannot overlap the source. - Cost += DesLT.first * TLI->getLMULCost(DesLT.second); - Cost += getShuffleCost(TTI::SK_InsertSubvector, DesTp, {}, CostKind, - InsertIndex, SubTp); - } - return Cost; - } - return InstructionCost::getInvalid(); -} - static VectorType *getVRGatherIndexType(MVT DataVT, const RISCVSubtarget &ST, LLVMContext &C) { assert((DataVT.getScalarSizeInBits() != 8 || @@ -437,10 +394,6 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, LT.second, CostKind); } } - if (InstructionCost Cost = - isMultipleInsertSubvector(Tp, Mask, CostKind); - Cost.isValid()) - return Cost; } // vrgather + cost of generating the mask constant. // We model this for an unknown mask with a single vrgather. diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h index 9d6317baabc6585..65bbd9055085570 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h @@ -55,12 +55,6 @@ class RISCVTTIImpl : public BasicTTIImplBase { /// type. InstructionCost getConstantPoolLoadCost(Type *Ty, TTI::TargetCostKind CostKind); - - /// Return the cost if a shufflevector can be consist of multiple vslideup. - /// Otherwise, return InstructionCost::getInvalid(). - InstructionCost isMultipleInsertSubvector(VectorType *Tp, ArrayRef Mask, - TTI::TargetCostKind CostKind); - public: explicit RISCVTTIImpl(const RISCVTargetMachine *TM, const Function &F) : BaseT(TM, F.getDataLayout()), ST(TM->getSubtargetImpl(F)), diff --git a/llvm/lib/Target/SPIRV/SPIRVEmitNonSemanticDI.cpp b/llvm/lib/Target/SPIRV/SPIRVEmitNonSemanticDI.cpp index f95f0d2988be289..d3e323efaee91b8 100644 --- a/llvm/lib/Target/SPIRV/SPIRVEmitNonSemanticDI.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVEmitNonSemanticDI.cpp @@ -4,6 +4,7 @@ #include "SPIRVGlobalRegistry.h" #include "SPIRVRegisterInfo.h" #include "SPIRVTargetMachine.h" +#include "SPIRVUtils.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallString.h" #include "llvm/BinaryFormat/Dwarf.h" @@ -104,6 +105,7 @@ bool SPIRVEmitNonSemanticDI::emitGlobalDI(MachineFunction &MF) { int64_t DwarfVersion = 0; int64_t DebugInfoVersion = 0; SmallPtrSet BasicTypes; + SmallPtrSet PointerDerivedTypes; // Searching through the Module metadata to find nescessary // information like DwarfVersion or SourceLanguage { @@ -146,8 +148,21 @@ bool SPIRVEmitNonSemanticDI::emitGlobalDI(MachineFunction &MF) { for (DbgVariableRecord &DVR : filterDbgVars(I.getDbgRecordRange())) { DILocalVariable *LocalVariable = DVR.getVariable(); if (auto *BasicType = - dyn_cast(LocalVariable->getType())) + dyn_cast(LocalVariable->getType())) { BasicTypes.insert(BasicType); + } else if (auto *DerivedType = + dyn_cast(LocalVariable->getType())) { + if (DerivedType->getTag() == dwarf::DW_TAG_pointer_type) { + PointerDerivedTypes.insert(DerivedType); + // DIBasicType can be unreachable from DbgRecord and only + // pointed on from other DI types + // DerivedType->getBaseType is null when pointer + // is representing a void type + if (DerivedType->getBaseType()) + BasicTypes.insert( + cast(DerivedType->getBaseType())); + } + } } } } @@ -206,6 +221,7 @@ bool SPIRVEmitNonSemanticDI::emitGlobalDI(MachineFunction &MF) { const Register DwarfVersionReg = GR->buildConstantInt(DwarfVersion, MIRBuilder, I32Ty, false); + const Register DebugInfoVersionReg = GR->buildConstantInt(DebugInfoVersion, MIRBuilder, I32Ty, false); @@ -237,7 +253,6 @@ bool SPIRVEmitNonSemanticDI::emitGlobalDI(MachineFunction &MF) { break; case dwarf::DW_LANG_Zig: SpirvSourceLanguage = SourceLanguage::Zig; - break; } const Register SourceLanguageReg = @@ -255,6 +270,11 @@ bool SPIRVEmitNonSemanticDI::emitGlobalDI(MachineFunction &MF) { const Register I32ZeroReg = GR->buildConstantInt(0, MIRBuilder, I32Ty, false); + // We need to store pairs because further instructions reference + // the DIBasicTypes and size will be always small so there isn't + // need for any kind of map + SmallVector, 12> + BasicTypeRegPairs; for (auto *BasicType : BasicTypes) { const Register BasicTypeStrReg = EmitOpString(BasicType->getName()); @@ -288,11 +308,46 @@ bool SPIRVEmitNonSemanticDI::emitGlobalDI(MachineFunction &MF) { const Register AttributeEncodingReg = GR->buildConstantInt(AttributeEncoding, MIRBuilder, I32Ty, false); - [[maybe_unused]] const Register BasicTypeReg = EmitDIInstruction(SPIRV::NonSemanticExtInst::DebugTypeBasic, {BasicTypeStrReg, ConstIntBitwidthReg, AttributeEncodingReg, I32ZeroReg}); + BasicTypeRegPairs.emplace_back(BasicType, BasicTypeReg); + } + + if (PointerDerivedTypes.size()) { + for (const auto *PointerDerivedType : PointerDerivedTypes) { + + assert(PointerDerivedType->getDWARFAddressSpace().has_value()); + const Register StorageClassReg = GR->buildConstantInt( + addressSpaceToStorageClass( + PointerDerivedType->getDWARFAddressSpace().value(), + *TM->getSubtargetImpl()), + MIRBuilder, I32Ty, false); + + // If the Pointer is representing a void type it's getBaseType + // is a nullptr + const auto *MaybeNestedBasicType = + cast_or_null(PointerDerivedType->getBaseType()); + if (MaybeNestedBasicType) { + for (const auto &BasicTypeRegPair : BasicTypeRegPairs) { + const auto &[DefinedBasicType, BasicTypeReg] = BasicTypeRegPair; + if (DefinedBasicType == MaybeNestedBasicType) { + [[maybe_unused]] + const Register DebugPointerTypeReg = EmitDIInstruction( + SPIRV::NonSemanticExtInst::DebugTypePointer, + {BasicTypeReg, StorageClassReg, I32ZeroReg}); + } + } + } else { + const Register DebugInfoNoneReg = + EmitDIInstruction(SPIRV::NonSemanticExtInst::DebugInfoNone, {}); + [[maybe_unused]] + const Register DebugPointerTypeReg = EmitDIInstruction( + SPIRV::NonSemanticExtInst::DebugTypePointer, + {DebugInfoNoneReg, StorageClassReg, I32ZeroReg}); + } + } } } return true; diff --git a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp index 6af4dd3a3497188..70cdd73e73f668a 100644 --- a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp @@ -436,7 +436,7 @@ void SPIRVModuleAnalysis::processOtherInstrs(const Module &M) { namespace NS = SPIRV::NonSemanticExtInst; static constexpr int64_t GlobalNonSemanticDITy[] = { NS::DebugSource, NS::DebugCompilationUnit, NS::DebugInfoNone, - NS::DebugTypeBasic}; + NS::DebugTypeBasic, NS::DebugTypePointer}; bool IsGlobalDI = false; for (unsigned Idx = 0; Idx < std::size(GlobalNonSemanticDITy); ++Idx) IsGlobalDI |= Ins.getImm() == GlobalNonSemanticDITy[Idx]; diff --git a/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.cpp b/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.cpp index effc2e65223cad5..cc4338f6a59fa66 100644 --- a/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.cpp +++ b/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.cpp @@ -59,7 +59,7 @@ void WebAssemblyAsmTypeCheck::localDecl( } void WebAssemblyAsmTypeCheck::dumpTypeStack(Twine Msg) { - LLVM_DEBUG({ dbgs() << Msg << getTypesString(Stack, 0) << "\n"; }); + LLVM_DEBUG({ dbgs() << Msg << getTypesString(Stack) << "\n"; }); } bool WebAssemblyAsmTypeCheck::typeError(SMLoc ErrorLoc, const Twine &Msg) { @@ -116,8 +116,15 @@ std::string WebAssemblyAsmTypeCheck::getTypesString(ArrayRef Types, return SS.str(); } +std::string +WebAssemblyAsmTypeCheck::getTypesString(ArrayRef Types, + size_t StartPos) { + return getTypesString(valTypesToStackTypes(Types), StartPos); +} + SmallVector -WebAssemblyAsmTypeCheck::valTypeToStackType(ArrayRef ValTypes) { +WebAssemblyAsmTypeCheck::valTypesToStackTypes( + ArrayRef ValTypes) { SmallVector Types(ValTypes.size()); std::transform(ValTypes.begin(), ValTypes.end(), Types.begin(), [](wasm::ValType Val) -> StackType { return Val; }); @@ -127,7 +134,7 @@ WebAssemblyAsmTypeCheck::valTypeToStackType(ArrayRef ValTypes) { bool WebAssemblyAsmTypeCheck::checkTypes(SMLoc ErrorLoc, ArrayRef ValTypes, bool ExactMatch) { - return checkTypes(ErrorLoc, valTypeToStackType(ValTypes), ExactMatch); + return checkTypes(ErrorLoc, valTypesToStackTypes(ValTypes), ExactMatch); } bool WebAssemblyAsmTypeCheck::checkTypes(SMLoc ErrorLoc, @@ -178,14 +185,14 @@ bool WebAssemblyAsmTypeCheck::checkTypes(SMLoc ErrorLoc, : std::max((int)BlockStackStartPos, (int)Stack.size() - (int)Types.size()); return typeError(ErrorLoc, "type mismatch, expected " + - getTypesString(Types, 0) + " but got " + + getTypesString(Types) + " but got " + getTypesString(Stack, StackStartPos)); } bool WebAssemblyAsmTypeCheck::popTypes(SMLoc ErrorLoc, ArrayRef ValTypes, bool ExactMatch) { - return popTypes(ErrorLoc, valTypeToStackType(ValTypes), ExactMatch); + return popTypes(ErrorLoc, valTypesToStackTypes(ValTypes), ExactMatch); } bool WebAssemblyAsmTypeCheck::popTypes(SMLoc ErrorLoc, @@ -215,7 +222,7 @@ bool WebAssemblyAsmTypeCheck::popAnyType(SMLoc ErrorLoc) { } void WebAssemblyAsmTypeCheck::pushTypes(ArrayRef ValTypes) { - Stack.append(valTypeToStackType(ValTypes)); + Stack.append(valTypesToStackTypes(ValTypes)); } bool WebAssemblyAsmTypeCheck::getLocal(SMLoc ErrorLoc, const MCOperand &LocalOp, @@ -322,6 +329,68 @@ bool WebAssemblyAsmTypeCheck::endOfFunction(SMLoc ErrorLoc, bool ExactMatch) { return checkTypes(ErrorLoc, FuncInfo.Sig.Returns, ExactMatch); } +// Unlike checkTypes() family, this just compare the equivalence of the two +// ValType vectors +static bool compareTypes(ArrayRef TypesA, + ArrayRef TypesB) { + if (TypesA.size() != TypesB.size()) + return true; + for (size_t I = 0, E = TypesA.size(); I < E; I++) + if (TypesA[I] != TypesB[I]) + return true; + return false; +} + +bool WebAssemblyAsmTypeCheck::checkTryTable(SMLoc ErrorLoc, + const MCInst &Inst) { + bool Error = false; + unsigned OpIdx = 1; // OpIdx 0 is the block type + int64_t NumCatches = Inst.getOperand(OpIdx++).getImm(); + for (int64_t I = 0; I < NumCatches; I++) { + int64_t Opcode = Inst.getOperand(OpIdx++).getImm(); + std::string ErrorMsgBase = + "try_table: catch index " + std::to_string(I) + ": "; + + const wasm::WasmSignature *Sig = nullptr; + SmallVector SentTypes; + if (Opcode == wasm::WASM_OPCODE_CATCH || + Opcode == wasm::WASM_OPCODE_CATCH_REF) { + if (!getSignature(ErrorLoc, Inst.getOperand(OpIdx++), + wasm::WASM_SYMBOL_TYPE_TAG, Sig)) + SentTypes.insert(SentTypes.end(), Sig->Params.begin(), + Sig->Params.end()); + else + Error = true; + } + if (Opcode == wasm::WASM_OPCODE_CATCH_REF || + Opcode == wasm::WASM_OPCODE_CATCH_ALL_REF) { + SentTypes.push_back(wasm::ValType::EXNREF); + } + + unsigned Level = Inst.getOperand(OpIdx++).getImm(); + if (Level < BlockInfoStack.size()) { + const auto &DestBlockInfo = + BlockInfoStack[BlockInfoStack.size() - Level - 1]; + ArrayRef DestTypes; + if (DestBlockInfo.IsLoop) + DestTypes = DestBlockInfo.Sig.Params; + else + DestTypes = DestBlockInfo.Sig.Returns; + if (compareTypes(SentTypes, DestTypes)) { + std::string ErrorMsg = + ErrorMsgBase + "type mismatch, catch tag type is " + + getTypesString(SentTypes) + ", but destination's type is " + + getTypesString(DestTypes); + Error |= typeError(ErrorLoc, ErrorMsg); + } + } else { + Error = typeError(ErrorLoc, ErrorMsgBase + "invalid depth " + + std::to_string(Level)); + } + } + return Error; +} + bool WebAssemblyAsmTypeCheck::typeCheck(SMLoc ErrorLoc, const MCInst &Inst, OperandVector &Operands) { auto Opc = Inst.getOpcode(); @@ -460,10 +529,13 @@ bool WebAssemblyAsmTypeCheck::typeCheck(SMLoc ErrorLoc, const MCInst &Inst, return popType(ErrorLoc, Any{}); } - if (Name == "block" || Name == "loop" || Name == "if" || Name == "try") { + if (Name == "block" || Name == "loop" || Name == "if" || Name == "try" || + Name == "try_table") { bool Error = Name == "if" && popType(ErrorLoc, wasm::ValType::I32); // Pop block input parameters and check their types are correct Error |= popTypes(ErrorLoc, LastSig.Params); + if (Name == "try_table") + Error |= checkTryTable(ErrorLoc, Inst); // Push a new block info BlockInfoStack.push_back({LastSig, Stack.size(), Name == "loop"}); // Push back block input parameters @@ -472,8 +544,8 @@ bool WebAssemblyAsmTypeCheck::typeCheck(SMLoc ErrorLoc, const MCInst &Inst, } if (Name == "end_block" || Name == "end_loop" || Name == "end_if" || - Name == "end_try" || Name == "delegate" || Name == "else" || - Name == "catch" || Name == "catch_all") { + Name == "end_try" || Name == "delegate" || Name == "end_try_table" || + Name == "else" || Name == "catch" || Name == "catch_all") { assert(!BlockInfoStack.empty()); // Check if the types on the stack match with the block return type const auto &LastBlockInfo = BlockInfoStack.back(); @@ -586,6 +658,12 @@ bool WebAssemblyAsmTypeCheck::typeCheck(SMLoc ErrorLoc, const MCInst &Inst, return Error; } + if (Name == "throw_ref") { + bool Error = popType(ErrorLoc, wasm::ValType::EXNREF); + pushType(Polymorphic{}); + return Error; + } + // The current instruction is a stack instruction which doesn't have // explicit operands that indicate push/pop types, so we get those from // the register version of the same instruction. diff --git a/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.h b/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.h index 596fb27bce94e65..e6fddf98060265f 100644 --- a/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.h +++ b/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.h @@ -65,9 +65,11 @@ class WebAssemblyAsmTypeCheck final { void pushTypes(ArrayRef Types); void pushType(StackType Type) { Stack.push_back(Type); } bool match(StackType TypeA, StackType TypeB); - std::string getTypesString(ArrayRef Types, size_t StartPos); + std::string getTypesString(ArrayRef Types, + size_t StartPos = 0); + std::string getTypesString(ArrayRef Types, size_t StartPos = 0); SmallVector - valTypeToStackType(ArrayRef ValTypes); + valTypesToStackTypes(ArrayRef ValTypes); void dumpTypeStack(Twine Msg); bool typeError(SMLoc ErrorLoc, const Twine &Msg); @@ -80,6 +82,7 @@ class WebAssemblyAsmTypeCheck final { bool getTable(SMLoc ErrorLoc, const MCOperand &TableOp, wasm::ValType &Type); bool getSignature(SMLoc ErrorLoc, const MCOperand &SigOp, wasm::WasmSymbolType Type, const wasm::WasmSignature *&Sig); + bool checkTryTable(SMLoc ErrorLoc, const MCInst &Inst); public: WebAssemblyAsmTypeCheck(MCAsmParser &Parser, const MCInstrInfo &MII, diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp index 6e175f25c4882c2..6e1f4b6052bda8b 100644 --- a/llvm/lib/TargetParser/Host.cpp +++ b/llvm/lib/TargetParser/Host.cpp @@ -1050,6 +1050,7 @@ static const char *getAMDProcessorTypeAndSubtype(unsigned Family, CPU = "k8"; break; case 16: + case 18: CPU = "amdfam10"; *Type = X86::AMDFAM10H; // "amdfam10" switch (Model) { diff --git a/llvm/lib/Transforms/Coroutines/ABI.h b/llvm/lib/Transforms/Coroutines/ABI.h index c94bf7d356b650a..7fa835e84ca3362 100644 --- a/llvm/lib/Transforms/Coroutines/ABI.h +++ b/llvm/lib/Transforms/Coroutines/ABI.h @@ -41,7 +41,7 @@ class LLVM_LIBRARY_VISIBILITY BaseABI { virtual void init() = 0; // Allocate the coroutine frame and do spill/reload as needed. - virtual void buildCoroutineFrame(); + virtual void buildCoroutineFrame(bool OptimizeFrame); // Perform the function splitting according to the ABI. virtual void splitCoroutine(Function &F, coro::Shape &Shape, diff --git a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp index 021b1f7a4156b9f..91530503a7e1ed0 100644 --- a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp @@ -234,7 +234,7 @@ class FrameTypeBuilder { /// Side Effects: Because We sort the allocas, the order of allocas in the /// frame may be different with the order in the source code. void addFieldForAllocas(const Function &F, FrameDataInfo &FrameData, - coro::Shape &Shape); + coro::Shape &Shape, bool OptimizeFrame); /// Add a field to this structure. [[nodiscard]] FieldIDType addField(Type *Ty, MaybeAlign MaybeFieldAlignment, @@ -336,7 +336,8 @@ void FrameDataInfo::updateLayoutIndex(FrameTypeBuilder &B) { void FrameTypeBuilder::addFieldForAllocas(const Function &F, FrameDataInfo &FrameData, - coro::Shape &Shape) { + coro::Shape &Shape, + bool OptimizeFrame) { using AllocaSetType = SmallVector; SmallVector NonOverlapedAllocas; @@ -350,7 +351,7 @@ void FrameTypeBuilder::addFieldForAllocas(const Function &F, } }); - if (!Shape.OptimizeFrame) { + if (!OptimizeFrame) { for (const auto &A : FrameData.Allocas) { AllocaInst *Alloca = A.Alloca; NonOverlapedAllocas.emplace_back(AllocaSetType(1, Alloca)); @@ -860,7 +861,8 @@ static void buildFrameDebugInfo(Function &F, coro::Shape &Shape, // ... spills ... // }; static StructType *buildFrameType(Function &F, coro::Shape &Shape, - FrameDataInfo &FrameData) { + FrameDataInfo &FrameData, + bool OptimizeFrame) { LLVMContext &C = F.getContext(); const DataLayout &DL = F.getDataLayout(); StructType *FrameTy = [&] { @@ -905,7 +907,7 @@ static StructType *buildFrameType(Function &F, coro::Shape &Shape, // Because multiple allocas may own the same field slot, // we add allocas to field here. - B.addFieldForAllocas(F, FrameData, Shape); + B.addFieldForAllocas(F, FrameData, Shape, OptimizeFrame); // Add PromiseAlloca to Allocas list so that // 1. updateLayoutIndex could update its index after // `performOptimizedStructLayout` @@ -2056,7 +2058,7 @@ void coro::normalizeCoroutine(Function &F, coro::Shape &Shape, rewritePHIs(F); } -void coro::BaseABI::buildCoroutineFrame() { +void coro::BaseABI::buildCoroutineFrame(bool OptimizeFrame) { SuspendCrossingInfo Checker(F, Shape.CoroSuspends, Shape.CoroEnds); doRematerializations(F, Checker, IsMaterializable); @@ -2087,7 +2089,7 @@ void coro::BaseABI::buildCoroutineFrame() { // Build frame FrameDataInfo FrameData(Spills, Allocas); - Shape.FrameTy = buildFrameType(F, Shape, FrameData); + Shape.FrameTy = buildFrameType(F, Shape, FrameData, OptimizeFrame); Shape.FramePtr = Shape.CoroBegin; // For now, this works for C++ programs only. buildFrameDebugInfo(F, Shape, FrameData); diff --git a/llvm/lib/Transforms/Coroutines/CoroShape.h b/llvm/lib/Transforms/Coroutines/CoroShape.h index f4fb4baa6df3147..7daa03beb2542a2 100644 --- a/llvm/lib/Transforms/Coroutines/CoroShape.h +++ b/llvm/lib/Transforms/Coroutines/CoroShape.h @@ -112,9 +112,6 @@ struct LLVM_LIBRARY_VISIBILITY Shape { Value *FramePtr = nullptr; BasicBlock *AllocaSpillBlock = nullptr; - /// This would only be true if optimization are enabled. - bool OptimizeFrame; - struct SwitchLoweringStorage { SwitchInst *ResumeSwitch; AllocaInst *PromiseAlloca; @@ -265,8 +262,7 @@ struct LLVM_LIBRARY_VISIBILITY Shape { void emitDealloc(IRBuilder<> &Builder, Value *Ptr, CallGraph *CG) const; Shape() = default; - explicit Shape(Function &F, bool OptimizeFrame = false) - : OptimizeFrame(OptimizeFrame) { + explicit Shape(Function &F) { SmallVector CoroFrames; SmallVector UnusedCoroSaves; diff --git a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp index 4fbda077129fa56..9aed4f6522a3f70 100644 --- a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp @@ -2053,7 +2053,8 @@ void coro::SwitchABI::splitCoroutine(Function &F, coro::Shape &Shape, } static void doSplitCoroutine(Function &F, SmallVectorImpl &Clones, - coro::BaseABI &ABI, TargetTransformInfo &TTI) { + coro::BaseABI &ABI, TargetTransformInfo &TTI, + bool OptimizeFrame) { PrettyStackTraceFunction prettyStackTrace(F); auto &Shape = ABI.Shape; @@ -2064,7 +2065,7 @@ static void doSplitCoroutine(Function &F, SmallVectorImpl &Clones, simplifySuspendPoints(Shape); normalizeCoroutine(F, Shape, TTI); - ABI.buildCoroutineFrame(); + ABI.buildCoroutineFrame(OptimizeFrame); replaceFrameSizeAndAlignment(Shape); bool isNoSuspendCoroutine = Shape.CoroSuspends.empty(); @@ -2273,7 +2274,7 @@ PreservedAnalyses CoroSplitPass::run(LazyCallGraph::SCC &C, // unreachable blocks before collecting intrinsics into Shape. removeUnreachableBlocks(F); - coro::Shape Shape(F, OptimizeFrame); + coro::Shape Shape(F); if (!Shape.CoroBegin) continue; @@ -2283,7 +2284,7 @@ PreservedAnalyses CoroSplitPass::run(LazyCallGraph::SCC &C, SmallVector Clones; auto &TTI = FAM.getResult(F); - doSplitCoroutine(F, Clones, *ABI, TTI); + doSplitCoroutine(F, Clones, *ABI, TTI, OptimizeFrame); CurrentSCC = &updateCallGraphAfterCoroutineSplit( *N, Shape, Clones, *CurrentSCC, CG, AM, UR, FAM); diff --git a/llvm/lib/Transforms/IPO/FunctionImport.cpp b/llvm/lib/Transforms/IPO/FunctionImport.cpp index 261731fd565b027..fee27f72f208b0e 100644 --- a/llvm/lib/Transforms/IPO/FunctionImport.cpp +++ b/llvm/lib/Transforms/IPO/FunctionImport.cpp @@ -1553,20 +1553,21 @@ void llvm::gatherImportedSummariesForModule( } /// Emit the files \p ModulePath will import from into \p OutputFilename. -std::error_code llvm::EmitImportsFiles( +Error llvm::EmitImportsFiles( StringRef ModulePath, StringRef OutputFilename, const ModuleToSummariesForIndexTy &ModuleToSummariesForIndex) { std::error_code EC; raw_fd_ostream ImportsOS(OutputFilename, EC, sys::fs::OpenFlags::OF_Text); if (EC) - return EC; + return createFileError("cannot open " + OutputFilename, + errorCodeToError(EC)); for (const auto &ILI : ModuleToSummariesForIndex) // The ModuleToSummariesForIndex map includes an entry for the current // Module (needed for writing out the index files). We don't want to // include it in the imports file, however, so filter it out. if (ILI.first != ModulePath) ImportsOS << ILI.first << "\n"; - return std::error_code(); + return Error::success(); } bool llvm::convertToDeclaration(GlobalValue &GV) { diff --git a/llvm/lib/Transforms/Scalar/Scalarizer.cpp b/llvm/lib/Transforms/Scalar/Scalarizer.cpp index ee86e2e6c9751ef..72728c0f839e5d4 100644 --- a/llvm/lib/Transforms/Scalar/Scalarizer.cpp +++ b/llvm/lib/Transforms/Scalar/Scalarizer.cpp @@ -745,7 +745,8 @@ bool ScalarizerVisitor::splitCall(CallInst &CI) { Tys[0] = VS->RemainderTy; for (unsigned J = 0; J != NumArgs; ++J) { - if (isVectorIntrinsicWithScalarOpAtArg(ID, J)) { + if (isVectorIntrinsicWithScalarOpAtArg(ID, J) || + TTI->isTargetIntrinsicWithScalarOpAtArg(ID, J)) { ScalarCallOps.push_back(ScalarOperands[J]); } else { ScalarCallOps.push_back(Scattered[J][I]); diff --git a/llvm/lib/Transforms/Vectorize/CMakeLists.txt b/llvm/lib/Transforms/Vectorize/CMakeLists.txt index eeff4a9f6a8bae8..887c2089c5a5207 100644 --- a/llvm/lib/Transforms/Vectorize/CMakeLists.txt +++ b/llvm/lib/Transforms/Vectorize/CMakeLists.txt @@ -6,6 +6,7 @@ add_llvm_component_library(LLVMVectorize SandboxVectorizer/DependencyGraph.cpp SandboxVectorizer/Passes/BottomUpVec.cpp SandboxVectorizer/SandboxVectorizer.cpp + SandboxVectorizer/SeedCollector.cpp SLPVectorizer.cpp Vectorize.cpp VectorCombine.cpp diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SeedCollector.cpp b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SeedCollector.cpp new file mode 100644 index 000000000000000..00a7dc3fcec93e8 --- /dev/null +++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SeedCollector.cpp @@ -0,0 +1,64 @@ +//===- SeedCollector.cpp -0000000-----------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Vectorize/SandboxVectorizer/SeedCollector.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Analysis/LoopAccessAnalysis.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Type.h" +#include "llvm/SandboxIR/Instruction.h" +#include "llvm/SandboxIR/Utils.h" +#include "llvm/Support/Debug.h" + +using namespace llvm; +namespace llvm::sandboxir { + +MutableArrayRef SeedBundle::getSlice(unsigned StartIdx, + unsigned MaxVecRegBits, + bool ForcePowerOf2) { + // Use uint32_t here for compatibility with IsPowerOf2_32 + + // BitCount tracks the size of the working slice. From that we can tell + // when the working slice's size is a power-of-two and when it exceeds + // the legal size in MaxVecBits. + uint32_t BitCount = 0; + uint32_t NumElements = 0; + // Tracks the most recent slice where NumElements gave a power-of-2 BitCount + uint32_t NumElementsPowerOfTwo = 0; + uint32_t BitCountPowerOfTwo = 0; + // Can't start a slice with a used instruction. + assert(!isUsed(StartIdx) && "Expected unused at StartIdx"); + for (auto S : make_range(Seeds.begin() + StartIdx, Seeds.end())) { + uint32_t InstBits = Utils::getNumBits(S); + // Stop if this instruction is used, or if adding it puts the slice over + // the limit. + if (isUsed(StartIdx + NumElements) || BitCount + InstBits > MaxVecRegBits) + break; + NumElements++; + BitCount += InstBits; + if (ForcePowerOf2 && isPowerOf2_32(BitCount)) { + NumElementsPowerOfTwo = NumElements; + BitCountPowerOfTwo = BitCount; + } + } + if (ForcePowerOf2) { + NumElements = NumElementsPowerOfTwo; + BitCount = BitCountPowerOfTwo; + } + + assert((!ForcePowerOf2 || isPowerOf2_32(BitCount)) && + "Must be a power of two"); + // Return any non-empty slice + if (NumElements > 1) + return MutableArrayRef(&Seeds[StartIdx], NumElements); + else + return {}; +} + +} // namespace llvm::sandboxir diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 9b1294f2c42822b..f31db3c17e5cab2 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -2611,7 +2611,11 @@ void VPInterleaveRecipe::execute(VPTransformState &State) { assert((!BlockInMask || !Group->isReverse()) && "Reversed masked interleave-group not supported."); - Value *Index; + VPValue *Addr = getAddr(); + Value *ResAddr = State.get(Addr, VPLane(0)); + if (auto *I = dyn_cast(ResAddr)) + State.setDebugLocFrom(I->getDebugLoc()); + // If the group is reverse, adjust the index to refer to the last vector lane // instead of the first. We adjust the index from the first vector lane, // rather than directly getting the pointer for lane VF - 1, because the @@ -2619,24 +2623,17 @@ void VPInterleaveRecipe::execute(VPTransformState &State) { if (Group->isReverse()) { Value *RuntimeVF = getRuntimeVF(State.Builder, State.Builder.getInt32Ty(), State.VF); - Index = State.Builder.CreateSub(RuntimeVF, State.Builder.getInt32(1)); + Value *Index = + State.Builder.CreateSub(RuntimeVF, State.Builder.getInt32(1)); Index = State.Builder.CreateMul(Index, State.Builder.getInt32(Group->getFactor())); Index = State.Builder.CreateNeg(Index); - } else { - // TODO: Drop redundant 0-index GEP as follow-up. - Index = State.Builder.getInt32(0); - } - VPValue *Addr = getAddr(); - Value *ResAddr = State.get(Addr, VPLane(0)); - if (auto *I = dyn_cast(ResAddr)) - State.setDebugLocFrom(I->getDebugLoc()); - - bool InBounds = false; - if (auto *gep = dyn_cast(ResAddr->stripPointerCasts())) - InBounds = gep->isInBounds(); - ResAddr = State.Builder.CreateGEP(ScalarTy, ResAddr, Index, "", InBounds); + bool InBounds = false; + if (auto *Gep = dyn_cast(ResAddr->stripPointerCasts())) + InBounds = Gep->isInBounds(); + ResAddr = State.Builder.CreateGEP(ScalarTy, ResAddr, Index, "", InBounds); + } State.setDebugLocFrom(Instr->getDebugLoc()); Value *PoisonVec = PoisonValue::get(VecTy); diff --git a/llvm/test/Analysis/CostModel/RISCV/fixed-vector-insert-subvector.ll b/llvm/test/Analysis/CostModel/RISCV/fixed-vector-insert-subvector.ll deleted file mode 100644 index 47a2af92aee9509..000000000000000 --- a/llvm/test/Analysis/CostModel/RISCV/fixed-vector-insert-subvector.ll +++ /dev/null @@ -1,18 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py -; RUN: opt < %s -passes="print" 2>&1 -disable-output -S -mtriple=riscv64 -mattr=+v | FileCheck %s - -define void @test() { -; CHECK-LABEL: 'test' -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %0 = shufflevector <8 x float> poison, <8 x float> poison, <16 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %1 = shufflevector <4 x i16> poison, <4 x i16> poison, <16 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %2 = shufflevector <4 x float> poison, <4 x float> poison, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %3 = shufflevector <2 x i1> poison, <2 x i1> poison, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void -; -entry: - %0 = shufflevector <8 x float> poison, <8 x float> poison, <16 x i32> - %1 = shufflevector <4 x i16> poison, <4 x i16> poison, <16 x i32> - %2 = shufflevector <4 x float> poison, <4 x float> poison, <8 x i32> - %3 = shufflevector <2 x i1> poison, <2 x i1> poison, <4 x i32> - ret void -} diff --git a/llvm/test/Assembler/allockind-missing.ll b/llvm/test/Assembler/allockind-missing.ll index e8672fe9db032aa..4fb3f658a911901 100644 --- a/llvm/test/Assembler/allockind-missing.ll +++ b/llvm/test/Assembler/allockind-missing.ll @@ -1,4 +1,4 @@ -; RUN: not llvm-as %s -o /dev/null 2>&1 | FileCheck %s +; RUN: not llvm-as --disable-output %s 2>&1 | FileCheck -DFILE=%s %s +; CHECK: [[FILE]]:[[@LINE+1]]:30: error: expected allockind value declare void @f0() allockind() -; CHECK: :[[#@LINE-1]]:30: error: expected allockind value diff --git a/llvm/test/Assembler/invalid-inttype.ll b/llvm/test/Assembler/invalid-inttype.ll index df3175540fe829d..c8aa7c66b79e4dd 100644 --- a/llvm/test/Assembler/invalid-inttype.ll +++ b/llvm/test/Assembler/invalid-inttype.ll @@ -1,5 +1,5 @@ -; RUN: not llvm-as < %s 2>&1 | FileCheck %s +; RUN: not llvm-as --disable-output %s 2>&1 | FileCheck -DFILE=%s %s ; i8388609 is the smallest integer type that can't be represented in LLVM IR +; CHECK: [[FILE]]:[[@LINE+1]]:21: error: bitwidth for integer type out of range! @i2 = common global i8388609 0, align 4 -; CHECK: expected type diff --git a/llvm/test/Assembler/invalid-landingpad.ll b/llvm/test/Assembler/invalid-landingpad.ll index 306e94312dd552c..805d3dbaa4d2066 100644 --- a/llvm/test/Assembler/invalid-landingpad.ll +++ b/llvm/test/Assembler/invalid-landingpad.ll @@ -1,7 +1,7 @@ -; RUN: not llvm-as < %s 2>&1 | FileCheck %s +; RUN: not llvm-as --disable-output %s 2>&1 | FileCheck -DFILE=%s %s -; CHECK: clause argument must be a constant define void @test(i32 %in) personality ptr null { +; CHECK: [[FILE]]:[[@LINE+1]]:24: error: 'filter' clause has an invalid type landingpad {} filter i32 %in } diff --git a/llvm/test/Assembler/invalid-name.ll b/llvm/test/Assembler/invalid-name.ll index 0681ea528bf4301..74133e60df54d59 100644 Binary files a/llvm/test/Assembler/invalid-name.ll and b/llvm/test/Assembler/invalid-name.ll differ diff --git a/llvm/test/Assembler/invalid-name2.ll b/llvm/test/Assembler/invalid-name2.ll index 384dee6777d8054..8a848798a54cafe 100644 Binary files a/llvm/test/Assembler/invalid-name2.ll and b/llvm/test/Assembler/invalid-name2.ll differ diff --git a/llvm/test/Assembler/noalias-addrspace-md.ll b/llvm/test/Assembler/noalias-addrspace-md.ll new file mode 100644 index 000000000000000..62fabad86f683a6 --- /dev/null +++ b/llvm/test/Assembler/noalias-addrspace-md.ll @@ -0,0 +1,110 @@ +; RUN: llvm-as < %s | llvm-dis | FileCheck %s + +define i64 @atomicrmw_noalias_addrspace__0_1(ptr %ptr, i64 %val) { +; CHECK-LABEL: define i64 @atomicrmw_noalias_addrspace__0_1( +; CHECK-SAME: ptr [[PTR:%.*]], i64 [[VAL:%.*]]) { +; CHECK-NEXT: [[RET:%.*]] = atomicrmw add ptr [[PTR]], i64 [[VAL]] seq_cst, align 8, !noalias.addrspace [[META0:![0-9]+]] +; CHECK-NEXT: ret i64 [[RET]] +; + %ret = atomicrmw add ptr %ptr, i64 %val seq_cst, align 8, !noalias.addrspace !0 + ret i64 %ret +} + +define i64 @atomicrmw_noalias_addrspace__0_2(ptr %ptr, i64 %val) { +; CHECK-LABEL: define i64 @atomicrmw_noalias_addrspace__0_2( +; CHECK-SAME: ptr [[PTR:%.*]], i64 [[VAL:%.*]]) { +; CHECK-NEXT: [[RET:%.*]] = atomicrmw add ptr [[PTR]], i64 [[VAL]] seq_cst, align 8, !noalias.addrspace [[META1:![0-9]+]] +; CHECK-NEXT: ret i64 [[RET]] +; + %ret = atomicrmw add ptr %ptr, i64 %val seq_cst, align 8, !noalias.addrspace !1 + ret i64 %ret +} + +define i64 @atomicrmw_noalias_addrspace__1_3(ptr %ptr, i64 %val) { +; CHECK-LABEL: define i64 @atomicrmw_noalias_addrspace__1_3( +; CHECK-SAME: ptr [[PTR:%.*]], i64 [[VAL:%.*]]) { +; CHECK-NEXT: [[RET:%.*]] = atomicrmw add ptr [[PTR]], i64 [[VAL]] seq_cst, align 8, !noalias.addrspace [[META2:![0-9]+]] +; CHECK-NEXT: ret i64 [[RET]] +; + %ret = atomicrmw add ptr %ptr, i64 %val seq_cst, align 8, !noalias.addrspace !2 + ret i64 %ret +} + +define i64 @atomicrmw_noalias_addrspace__multiple_ranges(ptr %ptr, i64 %val) { +; CHECK-LABEL: define i64 @atomicrmw_noalias_addrspace__multiple_ranges( +; CHECK-SAME: ptr [[PTR:%.*]], i64 [[VAL:%.*]]) { +; CHECK-NEXT: [[RET:%.*]] = atomicrmw add ptr [[PTR]], i64 [[VAL]] seq_cst, align 8, !noalias.addrspace [[META3:![0-9]+]] +; CHECK-NEXT: ret i64 [[RET]] +; + %ret = atomicrmw add ptr %ptr, i64 %val seq_cst, align 8, !noalias.addrspace !3 + ret i64 %ret +} + +define i64 @load_noalias_addrspace__5_6(ptr %ptr) { +; CHECK-LABEL: define i64 @load_noalias_addrspace__5_6( +; CHECK-SAME: ptr [[PTR:%.*]]) { +; CHECK-NEXT: [[RET:%.*]] = load i64, ptr [[PTR]], align 4, !noalias.addrspace [[META4:![0-9]+]] +; CHECK-NEXT: ret i64 [[RET]] +; + %ret = load i64, ptr %ptr, align 4, !noalias.addrspace !4 + ret i64 %ret +} + +define void @store_noalias_addrspace__5_6(ptr %ptr, i64 %val) { +; CHECK-LABEL: define void @store_noalias_addrspace__5_6( +; CHECK-SAME: ptr [[PTR:%.*]], i64 [[VAL:%.*]]) { +; CHECK-NEXT: store i64 [[VAL]], ptr [[PTR]], align 4, !noalias.addrspace [[META4]] +; CHECK-NEXT: ret void +; + store i64 %val, ptr %ptr, align 4, !noalias.addrspace !4 + ret void +} + +define { i64, i1 } @cmpxchg_noalias_addrspace__5_6(ptr %ptr, i64 %val0, i64 %val1) { +; CHECK-LABEL: define { i64, i1 } @cmpxchg_noalias_addrspace__5_6( +; CHECK-SAME: ptr [[PTR:%.*]], i64 [[VAL0:%.*]], i64 [[VAL1:%.*]]) { +; CHECK-NEXT: [[RET:%.*]] = cmpxchg ptr [[PTR]], i64 [[VAL0]], i64 [[VAL1]] monotonic monotonic, align 8, !noalias.addrspace [[META4]] +; CHECK-NEXT: ret { i64, i1 } [[RET]] +; + %ret = cmpxchg ptr %ptr, i64 %val0, i64 %val1 monotonic monotonic, align 8, !noalias.addrspace !4 + ret { i64, i1 } %ret +} + +declare void @foo() + +define void @call_noalias_addrspace__5_6(ptr %ptr) { +; CHECK-LABEL: define void @call_noalias_addrspace__5_6( +; CHECK-SAME: ptr [[PTR:%.*]]) { +; CHECK-NEXT: call void @foo(), !noalias.addrspace [[META4]] +; CHECK-NEXT: ret void +; + call void @foo(), !noalias.addrspace !4 + ret void +} + +define void @call_memcpy_intrinsic_addrspace__5_6(ptr %dst, ptr %src, i64 %size) { +; CHECK-LABEL: define void @call_memcpy_intrinsic_addrspace__5_6( +; CHECK-SAME: ptr [[DST:%.*]], ptr [[SRC:%.*]], i64 [[SIZE:%.*]]) { +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[DST]], ptr [[SRC]], i64 [[SIZE]], i1 false), !noalias.addrspace [[META4]] +; CHECK-NEXT: ret void +; + call void @llvm.memcpy.p0.p0.i64(ptr %dst, ptr %src, i64 %size, i1 false), !noalias.addrspace !4 + ret void +} + +declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg) #0 + +attributes #0 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } + +!0 = !{i32 0, i32 1} +!1 = !{i32 0, i32 2} +!2 = !{i32 1, i32 3} +!3 = !{i32 4, i32 6, i32 10, i32 55} +!4 = !{i32 5, i32 6} +;. +; CHECK: [[META0]] = !{i32 0, i32 1} +; CHECK: [[META1]] = !{i32 0, i32 2} +; CHECK: [[META2]] = !{i32 1, i32 3} +; CHECK: [[META3]] = !{i32 4, i32 6, i32 10, i32 55} +; CHECK: [[META4]] = !{i32 5, i32 6} +;. diff --git a/llvm/test/Bitcode/amdgcn-atomic.ll b/llvm/test/Bitcode/amdgcn-atomic.ll index d642372799f56bc..87ca1e3a617ed91 100644 --- a/llvm/test/Bitcode/amdgcn-atomic.ll +++ b/llvm/test/Bitcode/amdgcn-atomic.ll @@ -2,10 +2,10 @@ define void @atomic_inc(ptr %ptr0, ptr addrspace(1) %ptr1, ptr addrspace(3) %ptr3) { - ; CHECK: atomicrmw uinc_wrap ptr %ptr0, i32 42 syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0 + ; CHECK: atomicrmw uinc_wrap ptr %ptr0, i32 42 syncscope("agent") seq_cst, align 4, !noalias.addrspace !0, !amdgpu.no.fine.grained.memory !1{{$}} %result0 = call i32 @llvm.amdgcn.atomic.inc.i32.p0(ptr %ptr0, i32 42, i32 0, i32 0, i1 false) - ; CHECK: atomicrmw uinc_wrap ptr addrspace(1) %ptr1, i32 43 syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0 + ; CHECK: atomicrmw uinc_wrap ptr addrspace(1) %ptr1, i32 43 syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !1 %result1 = call i32 @llvm.amdgcn.atomic.inc.i32.p1(ptr addrspace(1) %ptr1, i32 43, i32 0, i32 0, i1 false) ; CHECK: atomicrmw uinc_wrap ptr addrspace(3) %ptr3, i32 46 syncscope("agent") seq_cst, align 4{{$}} @@ -26,10 +26,10 @@ define void @atomic_inc(ptr %ptr0, ptr addrspace(1) %ptr1, ptr addrspace(3) %ptr } define void @atomic_dec(ptr %ptr0, ptr addrspace(1) %ptr1, ptr addrspace(3) %ptr3) { - ; CHECK: atomicrmw udec_wrap ptr %ptr0, i32 42 syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0 + ; CHECK: atomicrmw udec_wrap ptr %ptr0, i32 42 syncscope("agent") seq_cst, align 4, !noalias.addrspace !0, !amdgpu.no.fine.grained.memory !1{{$}} %result0 = call i32 @llvm.amdgcn.atomic.dec.i32.p0(ptr %ptr0, i32 42, i32 0, i32 0, i1 false) - ; CHECK: atomicrmw udec_wrap ptr addrspace(1) %ptr1, i32 43 syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0 + ; CHECK: atomicrmw udec_wrap ptr addrspace(1) %ptr1, i32 43 syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !1 %result1 = call i32 @llvm.amdgcn.atomic.dec.i32.p1(ptr addrspace(1) %ptr1, i32 43, i32 0, i32 0, i1 false) ; CHECK: atomicrmw udec_wrap ptr addrspace(3) %ptr3, i32 46 syncscope("agent") seq_cst, align 4{{$}} @@ -51,49 +51,49 @@ define void @atomic_dec(ptr %ptr0, ptr addrspace(1) %ptr1, ptr addrspace(3) %ptr ; Test some invalid ordering handling define void @ordering(ptr %ptr0, ptr addrspace(1) %ptr1, ptr addrspace(3) %ptr3) { - ; CHECK: atomicrmw volatile uinc_wrap ptr %ptr0, i32 42 syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0 + ; CHECK: atomicrmw volatile uinc_wrap ptr %ptr0, i32 42 syncscope("agent") seq_cst, align 4, !noalias.addrspace !0, !amdgpu.no.fine.grained.memory !1{{$}} %result0 = call i32 @llvm.amdgcn.atomic.inc.i32.p0(ptr %ptr0, i32 42, i32 -1, i32 0, i1 true) - ; CHECK: atomicrmw volatile uinc_wrap ptr addrspace(1) %ptr1, i32 43 syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0 + ; CHECK: atomicrmw volatile uinc_wrap ptr addrspace(1) %ptr1, i32 43 syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !1 %result1 = call i32 @llvm.amdgcn.atomic.inc.i32.p1(ptr addrspace(1) %ptr1, i32 43, i32 0, i32 0, i1 true) - ; CHECK: atomicrmw uinc_wrap ptr addrspace(1) %ptr1, i32 43 syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0 + ; CHECK: atomicrmw uinc_wrap ptr addrspace(1) %ptr1, i32 43 syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !1 %result2 = call i32 @llvm.amdgcn.atomic.inc.i32.p1(ptr addrspace(1) %ptr1, i32 43, i32 1, i32 0, i1 false) - ; CHECK: atomicrmw volatile uinc_wrap ptr addrspace(1) %ptr1, i32 43 syncscope("agent") monotonic, align 4, !amdgpu.no.fine.grained.memory !0 + ; CHECK: atomicrmw volatile uinc_wrap ptr addrspace(1) %ptr1, i32 43 syncscope("agent") monotonic, align 4, !amdgpu.no.fine.grained.memory !1 %result3 = call i32 @llvm.amdgcn.atomic.inc.i32.p1(ptr addrspace(1) %ptr1, i32 43, i32 2, i32 0, i1 true) - ; CHECK: atomicrmw uinc_wrap ptr addrspace(1) %ptr1, i32 43 syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0 + ; CHECK: atomicrmw uinc_wrap ptr addrspace(1) %ptr1, i32 43 syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !1 %result4 = call i32 @llvm.amdgcn.atomic.inc.i32.p1(ptr addrspace(1) %ptr1, i32 43, i32 3, i32 0, i1 false) - ; CHECK: atomicrmw volatile udec_wrap ptr %ptr0, i32 42 syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0 + ; CHECK: atomicrmw volatile udec_wrap ptr %ptr0, i32 42 syncscope("agent") seq_cst, align 4, !noalias.addrspace !0, !amdgpu.no.fine.grained.memory !1{{$}} %result5 = call i32 @llvm.amdgcn.atomic.dec.i32.p0(ptr %ptr0, i32 42, i32 0, i32 4, i1 true) - ; CHECK: atomicrmw udec_wrap ptr %ptr0, i32 42 syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0 + ; CHECK: atomicrmw udec_wrap ptr %ptr0, i32 42 syncscope("agent") seq_cst, align 4, !noalias.addrspace !0, !amdgpu.no.fine.grained.memory !1{{$}} %result6 = call i32 @llvm.amdgcn.atomic.dec.i32.p0(ptr %ptr0, i32 42, i32 0, i32 5, i1 false) - ; CHECK: atomicrmw volatile udec_wrap ptr %ptr0, i32 42 syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0 + ; CHECK: atomicrmw volatile udec_wrap ptr %ptr0, i32 42 syncscope("agent") seq_cst, align 4, !noalias.addrspace !0, !amdgpu.no.fine.grained.memory !1{{$}} %result7 = call i32 @llvm.amdgcn.atomic.dec.i32.p0(ptr %ptr0, i32 42, i32 0, i32 6, i1 true) - ; CHECK: atomicrmw udec_wrap ptr %ptr0, i32 42 syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0 + ; CHECK: atomicrmw udec_wrap ptr %ptr0, i32 42 syncscope("agent") seq_cst, align 4, !noalias.addrspace !0, !amdgpu.no.fine.grained.memory !1{{$}} %result8 = call i32 @llvm.amdgcn.atomic.dec.i32.p0(ptr %ptr0, i32 42, i32 0, i32 7, i1 false) - ; CHECK:= atomicrmw volatile udec_wrap ptr %ptr0, i32 42 syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0 + ; CHECK:= atomicrmw volatile udec_wrap ptr %ptr0, i32 42 syncscope("agent") seq_cst, align 4, !noalias.addrspace !0, !amdgpu.no.fine.grained.memory !1{{$}} %result9 = call i32 @llvm.amdgcn.atomic.dec.i32.p0(ptr %ptr0, i32 42, i32 0, i32 8, i1 true) - ; CHECK:= atomicrmw volatile udec_wrap ptr addrspace(1) %ptr1, i32 43 syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0 + ; CHECK:= atomicrmw volatile udec_wrap ptr addrspace(1) %ptr1, i32 43 syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !1 %result10 = call i32 @llvm.amdgcn.atomic.dec.i32.p1(ptr addrspace(1) %ptr1, i32 43, i32 3, i32 0, i1 true) ret void } define void @immarg_violations(ptr %ptr0, i32 %val32, i1 %val1) { - ; CHECK: atomicrmw udec_wrap ptr %ptr0, i32 42 syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0 + ; CHECK: atomicrmw udec_wrap ptr %ptr0, i32 42 syncscope("agent") seq_cst, align 4, !noalias.addrspace !0, !amdgpu.no.fine.grained.memory !1{{$}} %result0 = call i32 @llvm.amdgcn.atomic.dec.i32.p0(ptr %ptr0, i32 42, i32 %val32, i32 0, i1 false) -; CHECK: atomicrmw udec_wrap ptr %ptr0, i32 42 syncscope("agent") monotonic, align 4, !amdgpu.no.fine.grained.memory !0 +; CHECK: atomicrmw udec_wrap ptr %ptr0, i32 42 syncscope("agent") monotonic, align 4, !noalias.addrspace !0, !amdgpu.no.fine.grained.memory !1{{$}} %result1 = call i32 @llvm.amdgcn.atomic.dec.i32.p0(ptr %ptr0, i32 42, i32 2, i32 %val32, i1 false) - ; CHECK: atomicrmw volatile udec_wrap ptr %ptr0, i32 42 syncscope("agent") monotonic, align 4, !amdgpu.no.fine.grained.memory !0 + ; CHECK: atomicrmw volatile udec_wrap ptr %ptr0, i32 42 syncscope("agent") monotonic, align 4, !noalias.addrspace !0, !amdgpu.no.fine.grained.memory !1{{$}} %result2 = call i32 @llvm.amdgcn.atomic.dec.i32.p0(ptr %ptr0, i32 42, i32 2, i32 0, i1 %val1) ret void } @@ -304,7 +304,7 @@ declare <2 x i16> @llvm.amdgcn.flat.atomic.fadd.v2bf16.p0(ptr, <2 x i16>) define <2 x i16> @upgrade_amdgcn_flat_atomic_fadd_v2bf16_p0(ptr %ptr, <2 x i16> %data) { ; CHECK: [[BC0:%.+]] = bitcast <2 x i16> %data to <2 x bfloat> - ; CHECK-NEXT: [[ATOMIC:%.+]] = atomicrmw fadd ptr %ptr, <2 x bfloat> [[BC0]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} + ; CHECK-NEXT: [[ATOMIC:%.+]] = atomicrmw fadd ptr %ptr, <2 x bfloat> [[BC0]] syncscope("agent") seq_cst, align 4, !noalias.addrspace !0, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} ; CHECK-NEXT: [[BC1:%.+]] = bitcast <2 x bfloat> [[ATOMIC]] to <2 x i16> ; CHECK-NEXT: ret <2 x i16> [[BC1]] %result = call <2 x i16> @llvm.amdgcn.flat.atomic.fadd.v2bf16.p0(ptr %ptr, <2 x i16> %data) @@ -325,7 +325,7 @@ define <2 x i16> @upgrade_amdgcn_global_atomic_fadd_v2bf16_p1(ptr addrspace(1) % declare <2 x half> @llvm.amdgcn.flat.atomic.fadd.v2f16.p0.v2f16(ptr nocapture, <2 x half>) #0 define <2 x half> @upgrade_amdgcn_flat_atomic_fadd_v2f16_p0_v2f16(ptr %ptr, <2 x half> %data) { - ; CHECK: %{{.+}} = atomicrmw fadd ptr %ptr, <2 x half> %data syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} + ; CHECK: %{{.+}} = atomicrmw fadd ptr %ptr, <2 x half> %data syncscope("agent") seq_cst, align 4, !noalias.addrspace !{{[0-9]+}}, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} %result = call <2 x half> @llvm.amdgcn.flat.atomic.fadd.v2f16.p0.v2f16(ptr %ptr, <2 x half> %data) ret <2 x half> %result } @@ -341,7 +341,7 @@ define <2 x half> @upgrade_amdgcn_global_atomic_fadd_v2f16_p1_v2f16(ptr addrspac declare float @llvm.amdgcn.flat.atomic.fadd.f32.p0.f32(ptr nocapture, float) #0 define float @upgrade_amdgcn_flat_atomic_fadd_f32_p0_f32(ptr %ptr, float %data) { - ; CHECK: %{{.+}} = atomicrmw fadd ptr %ptr, float %data syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+}}, !amdgpu.ignore.denormal.mode !{{[0-9]+$}} + ; CHECK: %{{.+}} = atomicrmw fadd ptr %ptr, float %data syncscope("agent") seq_cst, align 4, !noalias.addrspace !{{[0-9]+}}, !amdgpu.no.fine.grained.memory !{{[0-9]+}}, !amdgpu.ignore.denormal.mode !{{[0-9]+$}} %result = call float @llvm.amdgcn.flat.atomic.fadd.f32.p0.f32(ptr %ptr, float %data) ret float %result } @@ -355,3 +355,6 @@ define float @upgrade_amdgcn_global_atomic_fadd_f32_p1_f32(ptr addrspace(1) %ptr } attributes #0 = { argmemonly nounwind willreturn } + +; CHECK: !0 = !{i32 5, i32 6} +; CHECK: !1 = !{} diff --git a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll index 2d568e858c36b74..29a9082173ea51e 100644 --- a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll +++ b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll @@ -8,7 +8,6 @@ ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_signed_v4f16_v4i50 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_signed_v8f16_v8i19 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_signed_v8f16_v8i50 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_signed_v8f16_v8i128 ; ; Float to signed 32-bit -- Vector size variation @@ -4496,183 +4495,269 @@ define <8 x i100> @test_signed_v8f16_v8i100(<8 x half> %f) { } define <8 x i128> @test_signed_v8f16_v8i128(<8 x half> %f) { -; CHECK-LABEL: test_signed_v8f16_v8i128: -; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #192 -; CHECK-NEXT: str d10, [sp, #64] // 8-byte Folded Spill -; CHECK-NEXT: stp d9, d8, [sp, #80] // 16-byte Folded Spill -; CHECK-NEXT: stp x29, x30, [sp, #96] // 16-byte Folded Spill -; CHECK-NEXT: stp x28, x27, [sp, #112] // 16-byte Folded Spill -; CHECK-NEXT: stp x26, x25, [sp, #128] // 16-byte Folded Spill -; CHECK-NEXT: stp x24, x23, [sp, #144] // 16-byte Folded Spill -; CHECK-NEXT: stp x22, x21, [sp, #160] // 16-byte Folded Spill -; CHECK-NEXT: stp x20, x19, [sp, #176] // 16-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 192 -; CHECK-NEXT: .cfi_offset w19, -8 -; CHECK-NEXT: .cfi_offset w20, -16 -; CHECK-NEXT: .cfi_offset w21, -24 -; CHECK-NEXT: .cfi_offset w22, -32 -; CHECK-NEXT: .cfi_offset w23, -40 -; CHECK-NEXT: .cfi_offset w24, -48 -; CHECK-NEXT: .cfi_offset w25, -56 -; CHECK-NEXT: .cfi_offset w26, -64 -; CHECK-NEXT: .cfi_offset w27, -72 -; CHECK-NEXT: .cfi_offset w28, -80 -; CHECK-NEXT: .cfi_offset w30, -88 -; CHECK-NEXT: .cfi_offset w29, -96 -; CHECK-NEXT: .cfi_offset b8, -104 -; CHECK-NEXT: .cfi_offset b9, -112 -; CHECK-NEXT: .cfi_offset b10, -128 -; CHECK-NEXT: str q0, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: mov x19, x8 -; CHECK-NEXT: fcvt s8, h0 -; CHECK-NEXT: str q0, [sp, #32] // 16-byte Folded Spill -; CHECK-NEXT: fmov s0, s8 -; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: movi v9.2s, #255, lsl #24 -; CHECK-NEXT: mov w8, #2130706431 // =0x7effffff -; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: fmov s10, w8 -; CHECK-NEXT: mov x22, #-9223372036854775808 // =0x8000000000000000 -; CHECK-NEXT: mov x23, #9223372036854775807 // =0x7fffffffffffffff -; CHECK-NEXT: mov h0, v0.h[1] -; CHECK-NEXT: fcmp s8, s9 -; CHECK-NEXT: csel x8, xzr, x0, lt -; CHECK-NEXT: csel x9, x22, x1, lt -; CHECK-NEXT: fcmp s8, s10 -; CHECK-NEXT: csel x9, x23, x9, gt -; CHECK-NEXT: csinv x8, x8, xzr, le -; CHECK-NEXT: fcmp s8, s8 -; CHECK-NEXT: fcvt s8, h0 -; CHECK-NEXT: csel x8, xzr, x8, vs -; CHECK-NEXT: str x8, [sp, #72] // 8-byte Folded Spill -; CHECK-NEXT: csel x8, xzr, x9, vs -; CHECK-NEXT: fmov s0, s8 -; CHECK-NEXT: str x8, [sp, #24] // 8-byte Folded Spill -; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: fcmp s8, s9 -; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: mov h0, v0.h[2] -; CHECK-NEXT: csel x8, xzr, x0, lt -; CHECK-NEXT: csel x9, x22, x1, lt -; CHECK-NEXT: fcmp s8, s10 -; CHECK-NEXT: csel x9, x23, x9, gt -; CHECK-NEXT: csinv x8, x8, xzr, le -; CHECK-NEXT: fcmp s8, s8 -; CHECK-NEXT: fcvt s8, h0 -; CHECK-NEXT: csel x10, xzr, x8, vs -; CHECK-NEXT: csel x8, xzr, x9, vs -; CHECK-NEXT: stp x8, x10, [sp, #8] // 16-byte Folded Spill -; CHECK-NEXT: fmov s0, s8 -; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: fcmp s8, s9 -; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: mov h0, v0.h[3] -; CHECK-NEXT: csel x8, xzr, x0, lt -; CHECK-NEXT: csel x9, x22, x1, lt -; CHECK-NEXT: fcmp s8, s10 -; CHECK-NEXT: csel x9, x23, x9, gt -; CHECK-NEXT: csinv x8, x8, xzr, le -; CHECK-NEXT: fcmp s8, s8 -; CHECK-NEXT: fcvt s8, h0 -; CHECK-NEXT: csel x8, xzr, x8, vs -; CHECK-NEXT: str x8, [sp, #32] // 8-byte Folded Spill -; CHECK-NEXT: csel x8, xzr, x9, vs -; CHECK-NEXT: fmov s0, s8 -; CHECK-NEXT: str x8, [sp] // 8-byte Folded Spill -; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: fcmp s8, s9 -; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: csel x8, xzr, x0, lt -; CHECK-NEXT: csel x9, x22, x1, lt -; CHECK-NEXT: fcmp s8, s10 -; CHECK-NEXT: csel x9, x23, x9, gt -; CHECK-NEXT: csinv x8, x8, xzr, le -; CHECK-NEXT: fcmp s8, s8 -; CHECK-NEXT: fcvt s8, h0 -; CHECK-NEXT: csel x28, xzr, x8, vs -; CHECK-NEXT: csel x29, xzr, x9, vs -; CHECK-NEXT: fmov s0, s8 -; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: fcmp s8, s9 -; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: mov h0, v0.h[1] -; CHECK-NEXT: csel x8, xzr, x0, lt -; CHECK-NEXT: csel x9, x22, x1, lt -; CHECK-NEXT: fcmp s8, s10 -; CHECK-NEXT: csel x9, x23, x9, gt -; CHECK-NEXT: csinv x8, x8, xzr, le -; CHECK-NEXT: fcmp s8, s8 -; CHECK-NEXT: fcvt s8, h0 -; CHECK-NEXT: csel x20, xzr, x8, vs -; CHECK-NEXT: csel x21, xzr, x9, vs -; CHECK-NEXT: fmov s0, s8 -; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: fcmp s8, s9 -; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: mov h0, v0.h[2] -; CHECK-NEXT: csel x8, xzr, x0, lt -; CHECK-NEXT: csel x9, x22, x1, lt -; CHECK-NEXT: fcmp s8, s10 -; CHECK-NEXT: csel x9, x23, x9, gt -; CHECK-NEXT: csinv x8, x8, xzr, le -; CHECK-NEXT: fcmp s8, s8 -; CHECK-NEXT: fcvt s8, h0 -; CHECK-NEXT: csel x24, xzr, x8, vs -; CHECK-NEXT: csel x25, xzr, x9, vs -; CHECK-NEXT: fmov s0, s8 -; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: fcmp s8, s9 -; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: mov h0, v0.h[3] -; CHECK-NEXT: csel x8, xzr, x0, lt -; CHECK-NEXT: csel x9, x22, x1, lt -; CHECK-NEXT: fcmp s8, s10 -; CHECK-NEXT: csel x9, x23, x9, gt -; CHECK-NEXT: csinv x8, x8, xzr, le -; CHECK-NEXT: fcmp s8, s8 -; CHECK-NEXT: fcvt s8, h0 -; CHECK-NEXT: csel x26, xzr, x8, vs -; CHECK-NEXT: csel x27, xzr, x9, vs -; CHECK-NEXT: fmov s0, s8 -; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: fcmp s8, s9 -; CHECK-NEXT: stp x26, x27, [x19, #32] -; CHECK-NEXT: stp x24, x25, [x19, #16] -; CHECK-NEXT: stp x20, x21, [x19] -; CHECK-NEXT: csel x8, xzr, x0, lt -; CHECK-NEXT: csel x9, x22, x1, lt -; CHECK-NEXT: fcmp s8, s10 -; CHECK-NEXT: stp x28, x29, [x19, #112] -; CHECK-NEXT: csel x9, x23, x9, gt -; CHECK-NEXT: csinv x8, x8, xzr, le -; CHECK-NEXT: fcmp s8, s8 -; CHECK-NEXT: csel x9, xzr, x9, vs -; CHECK-NEXT: csel x8, xzr, x8, vs -; CHECK-NEXT: stp x8, x9, [x19, #48] -; CHECK-NEXT: ldr x8, [sp] // 8-byte Folded Reload -; CHECK-NEXT: str x8, [x19, #104] -; CHECK-NEXT: ldr x8, [sp, #32] // 8-byte Folded Reload -; CHECK-NEXT: str x8, [x19, #96] -; CHECK-NEXT: ldr x8, [sp, #8] // 8-byte Folded Reload -; CHECK-NEXT: str x8, [x19, #88] -; CHECK-NEXT: ldr x8, [sp, #16] // 8-byte Folded Reload -; CHECK-NEXT: str x8, [x19, #80] -; CHECK-NEXT: ldr x8, [sp, #24] // 8-byte Folded Reload -; CHECK-NEXT: str x8, [x19, #72] -; CHECK-NEXT: ldr x8, [sp, #72] // 8-byte Folded Reload -; CHECK-NEXT: str x8, [x19, #64] -; CHECK-NEXT: ldp x20, x19, [sp, #176] // 16-byte Folded Reload -; CHECK-NEXT: ldr d10, [sp, #64] // 8-byte Folded Reload -; CHECK-NEXT: ldp x22, x21, [sp, #160] // 16-byte Folded Reload -; CHECK-NEXT: ldp x24, x23, [sp, #144] // 16-byte Folded Reload -; CHECK-NEXT: ldp x26, x25, [sp, #128] // 16-byte Folded Reload -; CHECK-NEXT: ldp x28, x27, [sp, #112] // 16-byte Folded Reload -; CHECK-NEXT: ldp x29, x30, [sp, #96] // 16-byte Folded Reload -; CHECK-NEXT: ldp d9, d8, [sp, #80] // 16-byte Folded Reload -; CHECK-NEXT: add sp, sp, #192 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: test_signed_v8f16_v8i128: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sub sp, sp, #192 +; CHECK-SD-NEXT: str d10, [sp, #64] // 8-byte Folded Spill +; CHECK-SD-NEXT: stp d9, d8, [sp, #80] // 16-byte Folded Spill +; CHECK-SD-NEXT: stp x29, x30, [sp, #96] // 16-byte Folded Spill +; CHECK-SD-NEXT: stp x28, x27, [sp, #112] // 16-byte Folded Spill +; CHECK-SD-NEXT: stp x26, x25, [sp, #128] // 16-byte Folded Spill +; CHECK-SD-NEXT: stp x24, x23, [sp, #144] // 16-byte Folded Spill +; CHECK-SD-NEXT: stp x22, x21, [sp, #160] // 16-byte Folded Spill +; CHECK-SD-NEXT: stp x20, x19, [sp, #176] // 16-byte Folded Spill +; CHECK-SD-NEXT: .cfi_def_cfa_offset 192 +; CHECK-SD-NEXT: .cfi_offset w19, -8 +; CHECK-SD-NEXT: .cfi_offset w20, -16 +; CHECK-SD-NEXT: .cfi_offset w21, -24 +; CHECK-SD-NEXT: .cfi_offset w22, -32 +; CHECK-SD-NEXT: .cfi_offset w23, -40 +; CHECK-SD-NEXT: .cfi_offset w24, -48 +; CHECK-SD-NEXT: .cfi_offset w25, -56 +; CHECK-SD-NEXT: .cfi_offset w26, -64 +; CHECK-SD-NEXT: .cfi_offset w27, -72 +; CHECK-SD-NEXT: .cfi_offset w28, -80 +; CHECK-SD-NEXT: .cfi_offset w30, -88 +; CHECK-SD-NEXT: .cfi_offset w29, -96 +; CHECK-SD-NEXT: .cfi_offset b8, -104 +; CHECK-SD-NEXT: .cfi_offset b9, -112 +; CHECK-SD-NEXT: .cfi_offset b10, -128 +; CHECK-SD-NEXT: str q0, [sp, #48] // 16-byte Folded Spill +; CHECK-SD-NEXT: ext v0.16b, v0.16b, v0.16b, #8 +; CHECK-SD-NEXT: mov x19, x8 +; CHECK-SD-NEXT: fcvt s8, h0 +; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: fmov s0, s8 +; CHECK-SD-NEXT: bl __fixsfti +; CHECK-SD-NEXT: movi v9.2s, #255, lsl #24 +; CHECK-SD-NEXT: mov w8, #2130706431 // =0x7effffff +; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: fmov s10, w8 +; CHECK-SD-NEXT: mov x22, #-9223372036854775808 // =0x8000000000000000 +; CHECK-SD-NEXT: mov x23, #9223372036854775807 // =0x7fffffffffffffff +; CHECK-SD-NEXT: mov h0, v0.h[1] +; CHECK-SD-NEXT: fcmp s8, s9 +; CHECK-SD-NEXT: csel x8, xzr, x0, lt +; CHECK-SD-NEXT: csel x9, x22, x1, lt +; CHECK-SD-NEXT: fcmp s8, s10 +; CHECK-SD-NEXT: csel x9, x23, x9, gt +; CHECK-SD-NEXT: csinv x8, x8, xzr, le +; CHECK-SD-NEXT: fcmp s8, s8 +; CHECK-SD-NEXT: fcvt s8, h0 +; CHECK-SD-NEXT: csel x8, xzr, x8, vs +; CHECK-SD-NEXT: str x8, [sp, #72] // 8-byte Folded Spill +; CHECK-SD-NEXT: csel x8, xzr, x9, vs +; CHECK-SD-NEXT: fmov s0, s8 +; CHECK-SD-NEXT: str x8, [sp, #24] // 8-byte Folded Spill +; CHECK-SD-NEXT: bl __fixsfti +; CHECK-SD-NEXT: fcmp s8, s9 +; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: mov h0, v0.h[2] +; CHECK-SD-NEXT: csel x8, xzr, x0, lt +; CHECK-SD-NEXT: csel x9, x22, x1, lt +; CHECK-SD-NEXT: fcmp s8, s10 +; CHECK-SD-NEXT: csel x9, x23, x9, gt +; CHECK-SD-NEXT: csinv x8, x8, xzr, le +; CHECK-SD-NEXT: fcmp s8, s8 +; CHECK-SD-NEXT: fcvt s8, h0 +; CHECK-SD-NEXT: csel x10, xzr, x8, vs +; CHECK-SD-NEXT: csel x8, xzr, x9, vs +; CHECK-SD-NEXT: stp x8, x10, [sp, #8] // 16-byte Folded Spill +; CHECK-SD-NEXT: fmov s0, s8 +; CHECK-SD-NEXT: bl __fixsfti +; CHECK-SD-NEXT: fcmp s8, s9 +; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: mov h0, v0.h[3] +; CHECK-SD-NEXT: csel x8, xzr, x0, lt +; CHECK-SD-NEXT: csel x9, x22, x1, lt +; CHECK-SD-NEXT: fcmp s8, s10 +; CHECK-SD-NEXT: csel x9, x23, x9, gt +; CHECK-SD-NEXT: csinv x8, x8, xzr, le +; CHECK-SD-NEXT: fcmp s8, s8 +; CHECK-SD-NEXT: fcvt s8, h0 +; CHECK-SD-NEXT: csel x8, xzr, x8, vs +; CHECK-SD-NEXT: str x8, [sp, #32] // 8-byte Folded Spill +; CHECK-SD-NEXT: csel x8, xzr, x9, vs +; CHECK-SD-NEXT: fmov s0, s8 +; CHECK-SD-NEXT: str x8, [sp] // 8-byte Folded Spill +; CHECK-SD-NEXT: bl __fixsfti +; CHECK-SD-NEXT: fcmp s8, s9 +; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload +; CHECK-SD-NEXT: csel x8, xzr, x0, lt +; CHECK-SD-NEXT: csel x9, x22, x1, lt +; CHECK-SD-NEXT: fcmp s8, s10 +; CHECK-SD-NEXT: csel x9, x23, x9, gt +; CHECK-SD-NEXT: csinv x8, x8, xzr, le +; CHECK-SD-NEXT: fcmp s8, s8 +; CHECK-SD-NEXT: fcvt s8, h0 +; CHECK-SD-NEXT: csel x28, xzr, x8, vs +; CHECK-SD-NEXT: csel x29, xzr, x9, vs +; CHECK-SD-NEXT: fmov s0, s8 +; CHECK-SD-NEXT: bl __fixsfti +; CHECK-SD-NEXT: fcmp s8, s9 +; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload +; CHECK-SD-NEXT: mov h0, v0.h[1] +; CHECK-SD-NEXT: csel x8, xzr, x0, lt +; CHECK-SD-NEXT: csel x9, x22, x1, lt +; CHECK-SD-NEXT: fcmp s8, s10 +; CHECK-SD-NEXT: csel x9, x23, x9, gt +; CHECK-SD-NEXT: csinv x8, x8, xzr, le +; CHECK-SD-NEXT: fcmp s8, s8 +; CHECK-SD-NEXT: fcvt s8, h0 +; CHECK-SD-NEXT: csel x20, xzr, x8, vs +; CHECK-SD-NEXT: csel x21, xzr, x9, vs +; CHECK-SD-NEXT: fmov s0, s8 +; CHECK-SD-NEXT: bl __fixsfti +; CHECK-SD-NEXT: fcmp s8, s9 +; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload +; CHECK-SD-NEXT: mov h0, v0.h[2] +; CHECK-SD-NEXT: csel x8, xzr, x0, lt +; CHECK-SD-NEXT: csel x9, x22, x1, lt +; CHECK-SD-NEXT: fcmp s8, s10 +; CHECK-SD-NEXT: csel x9, x23, x9, gt +; CHECK-SD-NEXT: csinv x8, x8, xzr, le +; CHECK-SD-NEXT: fcmp s8, s8 +; CHECK-SD-NEXT: fcvt s8, h0 +; CHECK-SD-NEXT: csel x24, xzr, x8, vs +; CHECK-SD-NEXT: csel x25, xzr, x9, vs +; CHECK-SD-NEXT: fmov s0, s8 +; CHECK-SD-NEXT: bl __fixsfti +; CHECK-SD-NEXT: fcmp s8, s9 +; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload +; CHECK-SD-NEXT: mov h0, v0.h[3] +; CHECK-SD-NEXT: csel x8, xzr, x0, lt +; CHECK-SD-NEXT: csel x9, x22, x1, lt +; CHECK-SD-NEXT: fcmp s8, s10 +; CHECK-SD-NEXT: csel x9, x23, x9, gt +; CHECK-SD-NEXT: csinv x8, x8, xzr, le +; CHECK-SD-NEXT: fcmp s8, s8 +; CHECK-SD-NEXT: fcvt s8, h0 +; CHECK-SD-NEXT: csel x26, xzr, x8, vs +; CHECK-SD-NEXT: csel x27, xzr, x9, vs +; CHECK-SD-NEXT: fmov s0, s8 +; CHECK-SD-NEXT: bl __fixsfti +; CHECK-SD-NEXT: fcmp s8, s9 +; CHECK-SD-NEXT: stp x26, x27, [x19, #32] +; CHECK-SD-NEXT: stp x24, x25, [x19, #16] +; CHECK-SD-NEXT: stp x20, x21, [x19] +; CHECK-SD-NEXT: csel x8, xzr, x0, lt +; CHECK-SD-NEXT: csel x9, x22, x1, lt +; CHECK-SD-NEXT: fcmp s8, s10 +; CHECK-SD-NEXT: stp x28, x29, [x19, #112] +; CHECK-SD-NEXT: csel x9, x23, x9, gt +; CHECK-SD-NEXT: csinv x8, x8, xzr, le +; CHECK-SD-NEXT: fcmp s8, s8 +; CHECK-SD-NEXT: csel x9, xzr, x9, vs +; CHECK-SD-NEXT: csel x8, xzr, x8, vs +; CHECK-SD-NEXT: stp x8, x9, [x19, #48] +; CHECK-SD-NEXT: ldr x8, [sp] // 8-byte Folded Reload +; CHECK-SD-NEXT: str x8, [x19, #104] +; CHECK-SD-NEXT: ldr x8, [sp, #32] // 8-byte Folded Reload +; CHECK-SD-NEXT: str x8, [x19, #96] +; CHECK-SD-NEXT: ldr x8, [sp, #8] // 8-byte Folded Reload +; CHECK-SD-NEXT: str x8, [x19, #88] +; CHECK-SD-NEXT: ldr x8, [sp, #16] // 8-byte Folded Reload +; CHECK-SD-NEXT: str x8, [x19, #80] +; CHECK-SD-NEXT: ldr x8, [sp, #24] // 8-byte Folded Reload +; CHECK-SD-NEXT: str x8, [x19, #72] +; CHECK-SD-NEXT: ldr x8, [sp, #72] // 8-byte Folded Reload +; CHECK-SD-NEXT: str x8, [x19, #64] +; CHECK-SD-NEXT: ldp x20, x19, [sp, #176] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr d10, [sp, #64] // 8-byte Folded Reload +; CHECK-SD-NEXT: ldp x22, x21, [sp, #160] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldp x24, x23, [sp, #144] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldp x26, x25, [sp, #128] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldp x28, x27, [sp, #112] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldp x29, x30, [sp, #96] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldp d9, d8, [sp, #80] // 16-byte Folded Reload +; CHECK-SD-NEXT: add sp, sp, #192 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-CVT-LABEL: test_signed_v8f16_v8i128: +; CHECK-GI-CVT: // %bb.0: +; CHECK-GI-CVT-NEXT: mov h1, v0.h[1] +; CHECK-GI-CVT-NEXT: mov h2, v0.h[2] +; CHECK-GI-CVT-NEXT: mov h3, v0.h[3] +; CHECK-GI-CVT-NEXT: mov h4, v0.h[4] +; CHECK-GI-CVT-NEXT: fcvt s5, h0 +; CHECK-GI-CVT-NEXT: mov h6, v0.h[5] +; CHECK-GI-CVT-NEXT: mov h7, v0.h[6] +; CHECK-GI-CVT-NEXT: mov h0, v0.h[7] +; CHECK-GI-CVT-NEXT: fcvt s1, h1 +; CHECK-GI-CVT-NEXT: fcvt s2, h2 +; CHECK-GI-CVT-NEXT: fcvt s3, h3 +; CHECK-GI-CVT-NEXT: fcvtzs x9, s5 +; CHECK-GI-CVT-NEXT: fcvt s4, h4 +; CHECK-GI-CVT-NEXT: fcvt s5, h6 +; CHECK-GI-CVT-NEXT: fcvt s0, h0 +; CHECK-GI-CVT-NEXT: fcvtzs x10, s1 +; CHECK-GI-CVT-NEXT: fcvt s1, h7 +; CHECK-GI-CVT-NEXT: fcvtzs x11, s2 +; CHECK-GI-CVT-NEXT: fcvtzs x12, s3 +; CHECK-GI-CVT-NEXT: mov v2.d[0], x9 +; CHECK-GI-CVT-NEXT: fcvtzs x9, s4 +; CHECK-GI-CVT-NEXT: mov v3.d[0], x10 +; CHECK-GI-CVT-NEXT: fcvtzs x10, s5 +; CHECK-GI-CVT-NEXT: mov v4.d[0], x11 +; CHECK-GI-CVT-NEXT: fcvtzs x11, s1 +; CHECK-GI-CVT-NEXT: mov v1.d[0], x12 +; CHECK-GI-CVT-NEXT: fcvtzs x12, s0 +; CHECK-GI-CVT-NEXT: mov v0.d[0], x9 +; CHECK-GI-CVT-NEXT: mov v2.d[1], xzr +; CHECK-GI-CVT-NEXT: mov v5.d[0], x10 +; CHECK-GI-CVT-NEXT: mov v3.d[1], xzr +; CHECK-GI-CVT-NEXT: mov v4.d[1], xzr +; CHECK-GI-CVT-NEXT: mov v6.d[0], x11 +; CHECK-GI-CVT-NEXT: mov v7.d[0], x12 +; CHECK-GI-CVT-NEXT: mov v1.d[1], xzr +; CHECK-GI-CVT-NEXT: mov v0.d[1], xzr +; CHECK-GI-CVT-NEXT: mov v5.d[1], xzr +; CHECK-GI-CVT-NEXT: stp q2, q3, [x8] +; CHECK-GI-CVT-NEXT: mov v6.d[1], xzr +; CHECK-GI-CVT-NEXT: mov v7.d[1], xzr +; CHECK-GI-CVT-NEXT: stp q4, q1, [x8, #32] +; CHECK-GI-CVT-NEXT: stp q0, q5, [x8, #64] +; CHECK-GI-CVT-NEXT: stp q6, q7, [x8, #96] +; CHECK-GI-CVT-NEXT: ret +; +; CHECK-GI-FP16-LABEL: test_signed_v8f16_v8i128: +; CHECK-GI-FP16: // %bb.0: +; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] +; CHECK-GI-FP16-NEXT: mov h2, v0.h[2] +; CHECK-GI-FP16-NEXT: mov h3, v0.h[3] +; CHECK-GI-FP16-NEXT: mov h4, v0.h[4] +; CHECK-GI-FP16-NEXT: fcvtzs x9, h0 +; CHECK-GI-FP16-NEXT: mov h5, v0.h[5] +; CHECK-GI-FP16-NEXT: fcvtzs x10, h1 +; CHECK-GI-FP16-NEXT: mov h1, v0.h[6] +; CHECK-GI-FP16-NEXT: fcvtzs x11, h2 +; CHECK-GI-FP16-NEXT: mov h0, v0.h[7] +; CHECK-GI-FP16-NEXT: fcvtzs x12, h3 +; CHECK-GI-FP16-NEXT: mov v2.d[0], x9 +; CHECK-GI-FP16-NEXT: fcvtzs x9, h4 +; CHECK-GI-FP16-NEXT: mov v3.d[0], x10 +; CHECK-GI-FP16-NEXT: fcvtzs x10, h5 +; CHECK-GI-FP16-NEXT: mov v4.d[0], x11 +; CHECK-GI-FP16-NEXT: fcvtzs x11, h1 +; CHECK-GI-FP16-NEXT: mov v1.d[0], x12 +; CHECK-GI-FP16-NEXT: fcvtzs x12, h0 +; CHECK-GI-FP16-NEXT: mov v0.d[0], x9 +; CHECK-GI-FP16-NEXT: mov v2.d[1], xzr +; CHECK-GI-FP16-NEXT: mov v5.d[0], x10 +; CHECK-GI-FP16-NEXT: mov v3.d[1], xzr +; CHECK-GI-FP16-NEXT: mov v4.d[1], xzr +; CHECK-GI-FP16-NEXT: mov v6.d[0], x11 +; CHECK-GI-FP16-NEXT: mov v7.d[0], x12 +; CHECK-GI-FP16-NEXT: mov v1.d[1], xzr +; CHECK-GI-FP16-NEXT: mov v0.d[1], xzr +; CHECK-GI-FP16-NEXT: mov v5.d[1], xzr +; CHECK-GI-FP16-NEXT: stp q2, q3, [x8] +; CHECK-GI-FP16-NEXT: mov v6.d[1], xzr +; CHECK-GI-FP16-NEXT: mov v7.d[1], xzr +; CHECK-GI-FP16-NEXT: stp q4, q1, [x8, #32] +; CHECK-GI-FP16-NEXT: stp q0, q5, [x8, #64] +; CHECK-GI-FP16-NEXT: stp q6, q7, [x8, #96] +; CHECK-GI-FP16-NEXT: ret %x = call <8 x i128> @llvm.fptosi.sat.v8f16.v8i128(<8 x half> %f) ret <8 x i128> %x } diff --git a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll index f63fba9dab6c631..046ec0d07902963 100644 --- a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll +++ b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll @@ -8,7 +8,6 @@ ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_unsigned_v4f16_v4i50 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_unsigned_v8f16_v8i19 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_unsigned_v8f16_v8i50 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_unsigned_v8f16_v8i128 ; ; Float to unsigned 32-bit -- Vector size variation @@ -3729,147 +3728,233 @@ define <8 x i100> @test_unsigned_v8f16_v8i100(<8 x half> %f) { } define <8 x i128> @test_unsigned_v8f16_v8i128(<8 x half> %f) { -; CHECK-LABEL: test_unsigned_v8f16_v8i128: -; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #176 -; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: stp x29, x30, [sp, #80] // 16-byte Folded Spill -; CHECK-NEXT: stp x28, x27, [sp, #96] // 16-byte Folded Spill -; CHECK-NEXT: stp x26, x25, [sp, #112] // 16-byte Folded Spill -; CHECK-NEXT: stp x24, x23, [sp, #128] // 16-byte Folded Spill -; CHECK-NEXT: stp x22, x21, [sp, #144] // 16-byte Folded Spill -; CHECK-NEXT: stp x20, x19, [sp, #160] // 16-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 176 -; CHECK-NEXT: .cfi_offset w19, -8 -; CHECK-NEXT: .cfi_offset w20, -16 -; CHECK-NEXT: .cfi_offset w21, -24 -; CHECK-NEXT: .cfi_offset w22, -32 -; CHECK-NEXT: .cfi_offset w23, -40 -; CHECK-NEXT: .cfi_offset w24, -48 -; CHECK-NEXT: .cfi_offset w25, -56 -; CHECK-NEXT: .cfi_offset w26, -64 -; CHECK-NEXT: .cfi_offset w27, -72 -; CHECK-NEXT: .cfi_offset w28, -80 -; CHECK-NEXT: .cfi_offset w30, -88 -; CHECK-NEXT: .cfi_offset w29, -96 -; CHECK-NEXT: .cfi_offset b8, -104 -; CHECK-NEXT: .cfi_offset b9, -112 -; CHECK-NEXT: str q0, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: mov x19, x8 -; CHECK-NEXT: fcvt s8, h0 -; CHECK-NEXT: str q0, [sp, #32] // 16-byte Folded Spill -; CHECK-NEXT: fmov s0, s8 -; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: mov w8, #2139095039 // =0x7f7fffff -; CHECK-NEXT: fcmp s8, #0.0 -; CHECK-NEXT: fmov s9, w8 -; CHECK-NEXT: mov h0, v0.h[1] -; CHECK-NEXT: csel x9, xzr, x1, lt -; CHECK-NEXT: csel x8, xzr, x0, lt -; CHECK-NEXT: fcmp s8, s9 -; CHECK-NEXT: fcvt s8, h0 -; CHECK-NEXT: csinv x10, x8, xzr, le -; CHECK-NEXT: csinv x8, x9, xzr, le -; CHECK-NEXT: stp x8, x10, [sp, #16] // 16-byte Folded Spill -; CHECK-NEXT: fmov s0, s8 -; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: fcmp s8, #0.0 -; CHECK-NEXT: mov h0, v0.h[2] -; CHECK-NEXT: csel x8, xzr, x1, lt -; CHECK-NEXT: csel x9, xzr, x0, lt -; CHECK-NEXT: fcmp s8, s9 -; CHECK-NEXT: fcvt s8, h0 -; CHECK-NEXT: csinv x9, x9, xzr, le -; CHECK-NEXT: csinv x8, x8, xzr, le -; CHECK-NEXT: stp x8, x9, [sp] // 16-byte Folded Spill -; CHECK-NEXT: fmov s0, s8 -; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: fcmp s8, #0.0 -; CHECK-NEXT: mov h0, v0.h[3] -; CHECK-NEXT: csel x8, xzr, x1, lt -; CHECK-NEXT: csel x9, xzr, x0, lt -; CHECK-NEXT: fcmp s8, s9 -; CHECK-NEXT: fcvt s8, h0 -; CHECK-NEXT: csinv x24, x9, xzr, le -; CHECK-NEXT: csinv x25, x8, xzr, le -; CHECK-NEXT: fmov s0, s8 -; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: fcmp s8, #0.0 -; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: csel x8, xzr, x1, lt -; CHECK-NEXT: csel x9, xzr, x0, lt -; CHECK-NEXT: fcmp s8, s9 -; CHECK-NEXT: fcvt s8, h0 -; CHECK-NEXT: csinv x26, x9, xzr, le -; CHECK-NEXT: csinv x27, x8, xzr, le -; CHECK-NEXT: fmov s0, s8 -; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: fcmp s8, #0.0 -; CHECK-NEXT: mov h0, v0.h[1] -; CHECK-NEXT: csel x8, xzr, x1, lt -; CHECK-NEXT: csel x9, xzr, x0, lt -; CHECK-NEXT: fcmp s8, s9 -; CHECK-NEXT: fcvt s8, h0 -; CHECK-NEXT: csinv x28, x9, xzr, le -; CHECK-NEXT: csinv x29, x8, xzr, le -; CHECK-NEXT: fmov s0, s8 -; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: fcmp s8, #0.0 -; CHECK-NEXT: mov h0, v0.h[2] -; CHECK-NEXT: csel x8, xzr, x1, lt -; CHECK-NEXT: csel x9, xzr, x0, lt -; CHECK-NEXT: fcmp s8, s9 -; CHECK-NEXT: fcvt s8, h0 -; CHECK-NEXT: csinv x20, x9, xzr, le -; CHECK-NEXT: csinv x21, x8, xzr, le -; CHECK-NEXT: fmov s0, s8 -; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: fcmp s8, #0.0 -; CHECK-NEXT: mov h0, v0.h[3] -; CHECK-NEXT: csel x8, xzr, x1, lt -; CHECK-NEXT: csel x9, xzr, x0, lt -; CHECK-NEXT: fcmp s8, s9 -; CHECK-NEXT: fcvt s8, h0 -; CHECK-NEXT: csinv x22, x9, xzr, le -; CHECK-NEXT: csinv x23, x8, xzr, le -; CHECK-NEXT: fmov s0, s8 -; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: fcmp s8, #0.0 -; CHECK-NEXT: stp x22, x23, [x19, #32] -; CHECK-NEXT: stp x20, x21, [x19, #16] -; CHECK-NEXT: stp x28, x29, [x19] -; CHECK-NEXT: csel x8, xzr, x1, lt -; CHECK-NEXT: csel x9, xzr, x0, lt -; CHECK-NEXT: fcmp s8, s9 -; CHECK-NEXT: stp x26, x27, [x19, #112] -; CHECK-NEXT: stp x24, x25, [x19, #96] -; CHECK-NEXT: csinv x8, x8, xzr, le -; CHECK-NEXT: csinv x9, x9, xzr, le -; CHECK-NEXT: stp x9, x8, [x19, #48] -; CHECK-NEXT: ldr x8, [sp] // 8-byte Folded Reload -; CHECK-NEXT: str x8, [x19, #88] -; CHECK-NEXT: ldr x8, [sp, #8] // 8-byte Folded Reload -; CHECK-NEXT: str x8, [x19, #80] -; CHECK-NEXT: ldr x8, [sp, #16] // 8-byte Folded Reload -; CHECK-NEXT: str x8, [x19, #72] -; CHECK-NEXT: ldr x8, [sp, #24] // 8-byte Folded Reload -; CHECK-NEXT: str x8, [x19, #64] -; CHECK-NEXT: ldp x20, x19, [sp, #160] // 16-byte Folded Reload -; CHECK-NEXT: ldp x22, x21, [sp, #144] // 16-byte Folded Reload -; CHECK-NEXT: ldp x24, x23, [sp, #128] // 16-byte Folded Reload -; CHECK-NEXT: ldp x26, x25, [sp, #112] // 16-byte Folded Reload -; CHECK-NEXT: ldp x28, x27, [sp, #96] // 16-byte Folded Reload -; CHECK-NEXT: ldp x29, x30, [sp, #80] // 16-byte Folded Reload -; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload -; CHECK-NEXT: add sp, sp, #176 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: test_unsigned_v8f16_v8i128: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sub sp, sp, #176 +; CHECK-SD-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill +; CHECK-SD-NEXT: stp x29, x30, [sp, #80] // 16-byte Folded Spill +; CHECK-SD-NEXT: stp x28, x27, [sp, #96] // 16-byte Folded Spill +; CHECK-SD-NEXT: stp x26, x25, [sp, #112] // 16-byte Folded Spill +; CHECK-SD-NEXT: stp x24, x23, [sp, #128] // 16-byte Folded Spill +; CHECK-SD-NEXT: stp x22, x21, [sp, #144] // 16-byte Folded Spill +; CHECK-SD-NEXT: stp x20, x19, [sp, #160] // 16-byte Folded Spill +; CHECK-SD-NEXT: .cfi_def_cfa_offset 176 +; CHECK-SD-NEXT: .cfi_offset w19, -8 +; CHECK-SD-NEXT: .cfi_offset w20, -16 +; CHECK-SD-NEXT: .cfi_offset w21, -24 +; CHECK-SD-NEXT: .cfi_offset w22, -32 +; CHECK-SD-NEXT: .cfi_offset w23, -40 +; CHECK-SD-NEXT: .cfi_offset w24, -48 +; CHECK-SD-NEXT: .cfi_offset w25, -56 +; CHECK-SD-NEXT: .cfi_offset w26, -64 +; CHECK-SD-NEXT: .cfi_offset w27, -72 +; CHECK-SD-NEXT: .cfi_offset w28, -80 +; CHECK-SD-NEXT: .cfi_offset w30, -88 +; CHECK-SD-NEXT: .cfi_offset w29, -96 +; CHECK-SD-NEXT: .cfi_offset b8, -104 +; CHECK-SD-NEXT: .cfi_offset b9, -112 +; CHECK-SD-NEXT: str q0, [sp, #48] // 16-byte Folded Spill +; CHECK-SD-NEXT: ext v0.16b, v0.16b, v0.16b, #8 +; CHECK-SD-NEXT: mov x19, x8 +; CHECK-SD-NEXT: fcvt s8, h0 +; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: fmov s0, s8 +; CHECK-SD-NEXT: bl __fixunssfti +; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: mov w8, #2139095039 // =0x7f7fffff +; CHECK-SD-NEXT: fcmp s8, #0.0 +; CHECK-SD-NEXT: fmov s9, w8 +; CHECK-SD-NEXT: mov h0, v0.h[1] +; CHECK-SD-NEXT: csel x9, xzr, x1, lt +; CHECK-SD-NEXT: csel x8, xzr, x0, lt +; CHECK-SD-NEXT: fcmp s8, s9 +; CHECK-SD-NEXT: fcvt s8, h0 +; CHECK-SD-NEXT: csinv x10, x8, xzr, le +; CHECK-SD-NEXT: csinv x8, x9, xzr, le +; CHECK-SD-NEXT: stp x8, x10, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: fmov s0, s8 +; CHECK-SD-NEXT: bl __fixunssfti +; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: fcmp s8, #0.0 +; CHECK-SD-NEXT: mov h0, v0.h[2] +; CHECK-SD-NEXT: csel x8, xzr, x1, lt +; CHECK-SD-NEXT: csel x9, xzr, x0, lt +; CHECK-SD-NEXT: fcmp s8, s9 +; CHECK-SD-NEXT: fcvt s8, h0 +; CHECK-SD-NEXT: csinv x9, x9, xzr, le +; CHECK-SD-NEXT: csinv x8, x8, xzr, le +; CHECK-SD-NEXT: stp x8, x9, [sp] // 16-byte Folded Spill +; CHECK-SD-NEXT: fmov s0, s8 +; CHECK-SD-NEXT: bl __fixunssfti +; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: fcmp s8, #0.0 +; CHECK-SD-NEXT: mov h0, v0.h[3] +; CHECK-SD-NEXT: csel x8, xzr, x1, lt +; CHECK-SD-NEXT: csel x9, xzr, x0, lt +; CHECK-SD-NEXT: fcmp s8, s9 +; CHECK-SD-NEXT: fcvt s8, h0 +; CHECK-SD-NEXT: csinv x24, x9, xzr, le +; CHECK-SD-NEXT: csinv x25, x8, xzr, le +; CHECK-SD-NEXT: fmov s0, s8 +; CHECK-SD-NEXT: bl __fixunssfti +; CHECK-SD-NEXT: fcmp s8, #0.0 +; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload +; CHECK-SD-NEXT: csel x8, xzr, x1, lt +; CHECK-SD-NEXT: csel x9, xzr, x0, lt +; CHECK-SD-NEXT: fcmp s8, s9 +; CHECK-SD-NEXT: fcvt s8, h0 +; CHECK-SD-NEXT: csinv x26, x9, xzr, le +; CHECK-SD-NEXT: csinv x27, x8, xzr, le +; CHECK-SD-NEXT: fmov s0, s8 +; CHECK-SD-NEXT: bl __fixunssfti +; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload +; CHECK-SD-NEXT: fcmp s8, #0.0 +; CHECK-SD-NEXT: mov h0, v0.h[1] +; CHECK-SD-NEXT: csel x8, xzr, x1, lt +; CHECK-SD-NEXT: csel x9, xzr, x0, lt +; CHECK-SD-NEXT: fcmp s8, s9 +; CHECK-SD-NEXT: fcvt s8, h0 +; CHECK-SD-NEXT: csinv x28, x9, xzr, le +; CHECK-SD-NEXT: csinv x29, x8, xzr, le +; CHECK-SD-NEXT: fmov s0, s8 +; CHECK-SD-NEXT: bl __fixunssfti +; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload +; CHECK-SD-NEXT: fcmp s8, #0.0 +; CHECK-SD-NEXT: mov h0, v0.h[2] +; CHECK-SD-NEXT: csel x8, xzr, x1, lt +; CHECK-SD-NEXT: csel x9, xzr, x0, lt +; CHECK-SD-NEXT: fcmp s8, s9 +; CHECK-SD-NEXT: fcvt s8, h0 +; CHECK-SD-NEXT: csinv x20, x9, xzr, le +; CHECK-SD-NEXT: csinv x21, x8, xzr, le +; CHECK-SD-NEXT: fmov s0, s8 +; CHECK-SD-NEXT: bl __fixunssfti +; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload +; CHECK-SD-NEXT: fcmp s8, #0.0 +; CHECK-SD-NEXT: mov h0, v0.h[3] +; CHECK-SD-NEXT: csel x8, xzr, x1, lt +; CHECK-SD-NEXT: csel x9, xzr, x0, lt +; CHECK-SD-NEXT: fcmp s8, s9 +; CHECK-SD-NEXT: fcvt s8, h0 +; CHECK-SD-NEXT: csinv x22, x9, xzr, le +; CHECK-SD-NEXT: csinv x23, x8, xzr, le +; CHECK-SD-NEXT: fmov s0, s8 +; CHECK-SD-NEXT: bl __fixunssfti +; CHECK-SD-NEXT: fcmp s8, #0.0 +; CHECK-SD-NEXT: stp x22, x23, [x19, #32] +; CHECK-SD-NEXT: stp x20, x21, [x19, #16] +; CHECK-SD-NEXT: stp x28, x29, [x19] +; CHECK-SD-NEXT: csel x8, xzr, x1, lt +; CHECK-SD-NEXT: csel x9, xzr, x0, lt +; CHECK-SD-NEXT: fcmp s8, s9 +; CHECK-SD-NEXT: stp x26, x27, [x19, #112] +; CHECK-SD-NEXT: stp x24, x25, [x19, #96] +; CHECK-SD-NEXT: csinv x8, x8, xzr, le +; CHECK-SD-NEXT: csinv x9, x9, xzr, le +; CHECK-SD-NEXT: stp x9, x8, [x19, #48] +; CHECK-SD-NEXT: ldr x8, [sp] // 8-byte Folded Reload +; CHECK-SD-NEXT: str x8, [x19, #88] +; CHECK-SD-NEXT: ldr x8, [sp, #8] // 8-byte Folded Reload +; CHECK-SD-NEXT: str x8, [x19, #80] +; CHECK-SD-NEXT: ldr x8, [sp, #16] // 8-byte Folded Reload +; CHECK-SD-NEXT: str x8, [x19, #72] +; CHECK-SD-NEXT: ldr x8, [sp, #24] // 8-byte Folded Reload +; CHECK-SD-NEXT: str x8, [x19, #64] +; CHECK-SD-NEXT: ldp x20, x19, [sp, #160] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldp x22, x21, [sp, #144] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldp x24, x23, [sp, #128] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldp x26, x25, [sp, #112] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldp x28, x27, [sp, #96] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldp x29, x30, [sp, #80] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload +; CHECK-SD-NEXT: add sp, sp, #176 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-CVT-LABEL: test_unsigned_v8f16_v8i128: +; CHECK-GI-CVT: // %bb.0: +; CHECK-GI-CVT-NEXT: mov h1, v0.h[1] +; CHECK-GI-CVT-NEXT: mov h2, v0.h[2] +; CHECK-GI-CVT-NEXT: mov h3, v0.h[3] +; CHECK-GI-CVT-NEXT: mov h4, v0.h[4] +; CHECK-GI-CVT-NEXT: fcvt s5, h0 +; CHECK-GI-CVT-NEXT: mov h6, v0.h[5] +; CHECK-GI-CVT-NEXT: mov h7, v0.h[6] +; CHECK-GI-CVT-NEXT: mov h0, v0.h[7] +; CHECK-GI-CVT-NEXT: fcvt s1, h1 +; CHECK-GI-CVT-NEXT: fcvt s2, h2 +; CHECK-GI-CVT-NEXT: fcvt s3, h3 +; CHECK-GI-CVT-NEXT: fcvtzu x9, s5 +; CHECK-GI-CVT-NEXT: fcvt s4, h4 +; CHECK-GI-CVT-NEXT: fcvt s5, h6 +; CHECK-GI-CVT-NEXT: fcvt s0, h0 +; CHECK-GI-CVT-NEXT: fcvtzu x10, s1 +; CHECK-GI-CVT-NEXT: fcvt s1, h7 +; CHECK-GI-CVT-NEXT: fcvtzu x11, s2 +; CHECK-GI-CVT-NEXT: fcvtzu x12, s3 +; CHECK-GI-CVT-NEXT: mov v2.d[0], x9 +; CHECK-GI-CVT-NEXT: fcvtzu x9, s4 +; CHECK-GI-CVT-NEXT: mov v3.d[0], x10 +; CHECK-GI-CVT-NEXT: fcvtzu x10, s5 +; CHECK-GI-CVT-NEXT: mov v4.d[0], x11 +; CHECK-GI-CVT-NEXT: fcvtzu x11, s1 +; CHECK-GI-CVT-NEXT: mov v1.d[0], x12 +; CHECK-GI-CVT-NEXT: fcvtzu x12, s0 +; CHECK-GI-CVT-NEXT: mov v0.d[0], x9 +; CHECK-GI-CVT-NEXT: mov v2.d[1], xzr +; CHECK-GI-CVT-NEXT: mov v5.d[0], x10 +; CHECK-GI-CVT-NEXT: mov v3.d[1], xzr +; CHECK-GI-CVT-NEXT: mov v4.d[1], xzr +; CHECK-GI-CVT-NEXT: mov v6.d[0], x11 +; CHECK-GI-CVT-NEXT: mov v7.d[0], x12 +; CHECK-GI-CVT-NEXT: mov v1.d[1], xzr +; CHECK-GI-CVT-NEXT: mov v0.d[1], xzr +; CHECK-GI-CVT-NEXT: mov v5.d[1], xzr +; CHECK-GI-CVT-NEXT: stp q2, q3, [x8] +; CHECK-GI-CVT-NEXT: mov v6.d[1], xzr +; CHECK-GI-CVT-NEXT: mov v7.d[1], xzr +; CHECK-GI-CVT-NEXT: stp q4, q1, [x8, #32] +; CHECK-GI-CVT-NEXT: stp q0, q5, [x8, #64] +; CHECK-GI-CVT-NEXT: stp q6, q7, [x8, #96] +; CHECK-GI-CVT-NEXT: ret +; +; CHECK-GI-FP16-LABEL: test_unsigned_v8f16_v8i128: +; CHECK-GI-FP16: // %bb.0: +; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] +; CHECK-GI-FP16-NEXT: mov h2, v0.h[2] +; CHECK-GI-FP16-NEXT: mov h3, v0.h[3] +; CHECK-GI-FP16-NEXT: mov h4, v0.h[4] +; CHECK-GI-FP16-NEXT: fcvtzu x9, h0 +; CHECK-GI-FP16-NEXT: mov h5, v0.h[5] +; CHECK-GI-FP16-NEXT: fcvtzu x10, h1 +; CHECK-GI-FP16-NEXT: mov h1, v0.h[6] +; CHECK-GI-FP16-NEXT: fcvtzu x11, h2 +; CHECK-GI-FP16-NEXT: mov h0, v0.h[7] +; CHECK-GI-FP16-NEXT: fcvtzu x12, h3 +; CHECK-GI-FP16-NEXT: mov v2.d[0], x9 +; CHECK-GI-FP16-NEXT: fcvtzu x9, h4 +; CHECK-GI-FP16-NEXT: mov v3.d[0], x10 +; CHECK-GI-FP16-NEXT: fcvtzu x10, h5 +; CHECK-GI-FP16-NEXT: mov v4.d[0], x11 +; CHECK-GI-FP16-NEXT: fcvtzu x11, h1 +; CHECK-GI-FP16-NEXT: mov v1.d[0], x12 +; CHECK-GI-FP16-NEXT: fcvtzu x12, h0 +; CHECK-GI-FP16-NEXT: mov v0.d[0], x9 +; CHECK-GI-FP16-NEXT: mov v2.d[1], xzr +; CHECK-GI-FP16-NEXT: mov v5.d[0], x10 +; CHECK-GI-FP16-NEXT: mov v3.d[1], xzr +; CHECK-GI-FP16-NEXT: mov v4.d[1], xzr +; CHECK-GI-FP16-NEXT: mov v6.d[0], x11 +; CHECK-GI-FP16-NEXT: mov v7.d[0], x12 +; CHECK-GI-FP16-NEXT: mov v1.d[1], xzr +; CHECK-GI-FP16-NEXT: mov v0.d[1], xzr +; CHECK-GI-FP16-NEXT: mov v5.d[1], xzr +; CHECK-GI-FP16-NEXT: stp q2, q3, [x8] +; CHECK-GI-FP16-NEXT: mov v6.d[1], xzr +; CHECK-GI-FP16-NEXT: mov v7.d[1], xzr +; CHECK-GI-FP16-NEXT: stp q4, q1, [x8, #32] +; CHECK-GI-FP16-NEXT: stp q0, q5, [x8, #64] +; CHECK-GI-FP16-NEXT: stp q6, q7, [x8, #96] +; CHECK-GI-FP16-NEXT: ret %x = call <8 x i128> @llvm.fptoui.sat.v8f16.v8i128(<8 x half> %f) ret <8 x i128> %x } diff --git a/llvm/test/CodeGen/AArch64/load.ll b/llvm/test/CodeGen/AArch64/load.ll index a4d1c53c272aa1e..167e9d1c1964352 100644 --- a/llvm/test/CodeGen/AArch64/load.ll +++ b/llvm/test/CodeGen/AArch64/load.ll @@ -315,3 +315,34 @@ define <3 x i32> @load_v3i32(ptr %ptr){ %a = load <3 x i32>, ptr %ptr ret <3 x i32> %a } + +define <2 x i128> @load_v2i128(ptr %p) { +; CHECK-SD-LABEL: load_v2i128: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldp x8, x1, [x0] +; CHECK-SD-NEXT: ldp x2, x3, [x0, #16] +; CHECK-SD-NEXT: mov x0, x8 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: load_v2i128: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldp q0, q1, [x0] +; CHECK-GI-NEXT: mov d2, v0.d[1] +; CHECK-GI-NEXT: mov d3, v1.d[1] +; CHECK-GI-NEXT: fmov x0, d0 +; CHECK-GI-NEXT: fmov x2, d1 +; CHECK-GI-NEXT: fmov x1, d2 +; CHECK-GI-NEXT: fmov x3, d3 +; CHECK-GI-NEXT: ret + %a = load <2 x i128>, ptr %p + ret <2 x i128> %a +} + +define <2 x fp128> @load_v2f128(ptr %p) { +; CHECK-LABEL: load_v2f128: +; CHECK: // %bb.0: +; CHECK-NEXT: ldp q0, q1, [x0] +; CHECK-NEXT: ret + %a = load <2 x fp128>, ptr %p + ret <2 x fp128> %a +} diff --git a/llvm/test/CodeGen/AArch64/store.ll b/llvm/test/CodeGen/AArch64/store.ll index bf22d79a4df994b..86d74b69f4958f1 100644 --- a/llvm/test/CodeGen/AArch64/store.ll +++ b/llvm/test/CodeGen/AArch64/store.ll @@ -340,3 +340,31 @@ define void @store_v3i32(<3 x i32> %a, ptr %ptr){ store <3 x i32> %a, ptr %ptr ret void } + +define void @store_v2i128(<2 x i128> %a, ptr %p) { +; CHECK-SD-LABEL: store_v2i128: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: stp x2, x3, [x4, #16] +; CHECK-SD-NEXT: stp x0, x1, [x4] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: store_v2i128: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov v0.d[0], x0 +; CHECK-GI-NEXT: mov v1.d[0], x2 +; CHECK-GI-NEXT: mov v0.d[1], x1 +; CHECK-GI-NEXT: mov v1.d[1], x3 +; CHECK-GI-NEXT: stp q0, q1, [x4] +; CHECK-GI-NEXT: ret + store <2 x i128> %a, ptr %p + ret void +} + +define void @store_v2f128(<2 x fp128> %a, ptr %p) { +; CHECK-LABEL: store_v2f128: +; CHECK: // %bb.0: +; CHECK-NEXT: stp q0, q1, [x0] +; CHECK-NEXT: ret + store <2 x fp128> %a, ptr %p + ret void +} diff --git a/llvm/test/CodeGen/AArch64/sve2-bf16-converts.ll b/llvm/test/CodeGen/AArch64/sve2-bf16-converts.ll new file mode 100644 index 000000000000000..e5d4e1e9bc7da66 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve2-bf16-converts.ll @@ -0,0 +1,201 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mattr=+sve2 < %s | FileCheck %s --check-prefixes=NOBF16 +; RUN: llc -mattr=+sve2 --enable-no-nans-fp-math < %s | FileCheck %s --check-prefixes=NOBF16NNAN +; RUN: llc -mattr=+sve2,+bf16 < %s | FileCheck %s --check-prefixes=BF16 +; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s --check-prefixes=BF16 + +target triple = "aarch64-unknown-linux-gnu" + +define @fptrunc_nxv2f64_to_nxv2bf16( %a) { +; NOBF16-LABEL: fptrunc_nxv2f64_to_nxv2bf16: +; NOBF16: // %bb.0: +; NOBF16-NEXT: ptrue p0.d +; NOBF16-NEXT: mov z1.s, #32767 // =0x7fff +; NOBF16-NEXT: fcvtx z0.s, p0/m, z0.d +; NOBF16-NEXT: lsr z2.s, z0.s, #16 +; NOBF16-NEXT: add z1.s, z0.s, z1.s +; NOBF16-NEXT: fcmuo p0.s, p0/z, z0.s, z0.s +; NOBF16-NEXT: orr z0.s, z0.s, #0x400000 +; NOBF16-NEXT: and z2.s, z2.s, #0x1 +; NOBF16-NEXT: add z1.s, z2.s, z1.s +; NOBF16-NEXT: sel z0.s, p0, z0.s, z1.s +; NOBF16-NEXT: lsr z0.s, z0.s, #16 +; NOBF16-NEXT: ret +; +; NOBF16NNAN-LABEL: fptrunc_nxv2f64_to_nxv2bf16: +; NOBF16NNAN: // %bb.0: +; NOBF16NNAN-NEXT: ptrue p0.d +; NOBF16NNAN-NEXT: mov z1.s, #32767 // =0x7fff +; NOBF16NNAN-NEXT: fcvtx z0.s, p0/m, z0.d +; NOBF16NNAN-NEXT: lsr z2.s, z0.s, #16 +; NOBF16NNAN-NEXT: add z0.s, z0.s, z1.s +; NOBF16NNAN-NEXT: and z2.s, z2.s, #0x1 +; NOBF16NNAN-NEXT: add z0.s, z2.s, z0.s +; NOBF16NNAN-NEXT: lsr z0.s, z0.s, #16 +; NOBF16NNAN-NEXT: ret +; +; BF16-LABEL: fptrunc_nxv2f64_to_nxv2bf16: +; BF16: // %bb.0: +; BF16-NEXT: ptrue p0.d +; BF16-NEXT: fcvtx z0.s, p0/m, z0.d +; BF16-NEXT: bfcvt z0.h, p0/m, z0.s +; BF16-NEXT: ret + %res = fptrunc %a to + ret %res +} + +define @fptrunc_nxv4f64_to_nxv4bf16( %a) { +; NOBF16-LABEL: fptrunc_nxv4f64_to_nxv4bf16: +; NOBF16: // %bb.0: +; NOBF16-NEXT: ptrue p0.d +; NOBF16-NEXT: mov z2.s, #32767 // =0x7fff +; NOBF16-NEXT: fcvtx z1.s, p0/m, z1.d +; NOBF16-NEXT: fcvtx z0.s, p0/m, z0.d +; NOBF16-NEXT: lsr z3.s, z1.s, #16 +; NOBF16-NEXT: lsr z4.s, z0.s, #16 +; NOBF16-NEXT: add z5.s, z1.s, z2.s +; NOBF16-NEXT: add z2.s, z0.s, z2.s +; NOBF16-NEXT: fcmuo p1.s, p0/z, z1.s, z1.s +; NOBF16-NEXT: fcmuo p0.s, p0/z, z0.s, z0.s +; NOBF16-NEXT: orr z1.s, z1.s, #0x400000 +; NOBF16-NEXT: orr z0.s, z0.s, #0x400000 +; NOBF16-NEXT: and z3.s, z3.s, #0x1 +; NOBF16-NEXT: and z4.s, z4.s, #0x1 +; NOBF16-NEXT: add z3.s, z3.s, z5.s +; NOBF16-NEXT: add z2.s, z4.s, z2.s +; NOBF16-NEXT: sel z1.s, p1, z1.s, z3.s +; NOBF16-NEXT: sel z0.s, p0, z0.s, z2.s +; NOBF16-NEXT: lsr z1.s, z1.s, #16 +; NOBF16-NEXT: lsr z0.s, z0.s, #16 +; NOBF16-NEXT: uzp1 z0.s, z0.s, z1.s +; NOBF16-NEXT: ret +; +; NOBF16NNAN-LABEL: fptrunc_nxv4f64_to_nxv4bf16: +; NOBF16NNAN: // %bb.0: +; NOBF16NNAN-NEXT: ptrue p0.d +; NOBF16NNAN-NEXT: mov z2.s, #32767 // =0x7fff +; NOBF16NNAN-NEXT: fcvtx z1.s, p0/m, z1.d +; NOBF16NNAN-NEXT: fcvtx z0.s, p0/m, z0.d +; NOBF16NNAN-NEXT: lsr z3.s, z1.s, #16 +; NOBF16NNAN-NEXT: lsr z4.s, z0.s, #16 +; NOBF16NNAN-NEXT: add z1.s, z1.s, z2.s +; NOBF16NNAN-NEXT: add z0.s, z0.s, z2.s +; NOBF16NNAN-NEXT: and z3.s, z3.s, #0x1 +; NOBF16NNAN-NEXT: and z4.s, z4.s, #0x1 +; NOBF16NNAN-NEXT: add z1.s, z3.s, z1.s +; NOBF16NNAN-NEXT: add z0.s, z4.s, z0.s +; NOBF16NNAN-NEXT: lsr z1.s, z1.s, #16 +; NOBF16NNAN-NEXT: lsr z0.s, z0.s, #16 +; NOBF16NNAN-NEXT: uzp1 z0.s, z0.s, z1.s +; NOBF16NNAN-NEXT: ret +; +; BF16-LABEL: fptrunc_nxv4f64_to_nxv4bf16: +; BF16: // %bb.0: +; BF16-NEXT: ptrue p0.d +; BF16-NEXT: fcvtx z1.s, p0/m, z1.d +; BF16-NEXT: fcvtx z0.s, p0/m, z0.d +; BF16-NEXT: bfcvt z1.h, p0/m, z1.s +; BF16-NEXT: bfcvt z0.h, p0/m, z0.s +; BF16-NEXT: uzp1 z0.s, z0.s, z1.s +; BF16-NEXT: ret + %res = fptrunc %a to + ret %res +} + +define @fptrunc_nxv8f64_to_nxv8bf16( %a) { +; NOBF16-LABEL: fptrunc_nxv8f64_to_nxv8bf16: +; NOBF16: // %bb.0: +; NOBF16-NEXT: ptrue p0.d +; NOBF16-NEXT: mov z4.s, #32767 // =0x7fff +; NOBF16-NEXT: fcvtx z3.s, p0/m, z3.d +; NOBF16-NEXT: fcvtx z2.s, p0/m, z2.d +; NOBF16-NEXT: fcvtx z1.s, p0/m, z1.d +; NOBF16-NEXT: fcvtx z0.s, p0/m, z0.d +; NOBF16-NEXT: lsr z5.s, z3.s, #16 +; NOBF16-NEXT: lsr z6.s, z2.s, #16 +; NOBF16-NEXT: lsr z7.s, z1.s, #16 +; NOBF16-NEXT: lsr z24.s, z0.s, #16 +; NOBF16-NEXT: add z25.s, z3.s, z4.s +; NOBF16-NEXT: add z26.s, z2.s, z4.s +; NOBF16-NEXT: add z27.s, z1.s, z4.s +; NOBF16-NEXT: add z4.s, z0.s, z4.s +; NOBF16-NEXT: fcmuo p1.s, p0/z, z3.s, z3.s +; NOBF16-NEXT: and z5.s, z5.s, #0x1 +; NOBF16-NEXT: and z6.s, z6.s, #0x1 +; NOBF16-NEXT: and z7.s, z7.s, #0x1 +; NOBF16-NEXT: and z24.s, z24.s, #0x1 +; NOBF16-NEXT: fcmuo p2.s, p0/z, z2.s, z2.s +; NOBF16-NEXT: fcmuo p3.s, p0/z, z1.s, z1.s +; NOBF16-NEXT: fcmuo p0.s, p0/z, z0.s, z0.s +; NOBF16-NEXT: orr z3.s, z3.s, #0x400000 +; NOBF16-NEXT: orr z2.s, z2.s, #0x400000 +; NOBF16-NEXT: add z5.s, z5.s, z25.s +; NOBF16-NEXT: add z6.s, z6.s, z26.s +; NOBF16-NEXT: add z7.s, z7.s, z27.s +; NOBF16-NEXT: add z4.s, z24.s, z4.s +; NOBF16-NEXT: orr z1.s, z1.s, #0x400000 +; NOBF16-NEXT: orr z0.s, z0.s, #0x400000 +; NOBF16-NEXT: sel z3.s, p1, z3.s, z5.s +; NOBF16-NEXT: sel z2.s, p2, z2.s, z6.s +; NOBF16-NEXT: sel z1.s, p3, z1.s, z7.s +; NOBF16-NEXT: sel z0.s, p0, z0.s, z4.s +; NOBF16-NEXT: lsr z3.s, z3.s, #16 +; NOBF16-NEXT: lsr z2.s, z2.s, #16 +; NOBF16-NEXT: lsr z1.s, z1.s, #16 +; NOBF16-NEXT: lsr z0.s, z0.s, #16 +; NOBF16-NEXT: uzp1 z2.s, z2.s, z3.s +; NOBF16-NEXT: uzp1 z0.s, z0.s, z1.s +; NOBF16-NEXT: uzp1 z0.h, z0.h, z2.h +; NOBF16-NEXT: ret +; +; NOBF16NNAN-LABEL: fptrunc_nxv8f64_to_nxv8bf16: +; NOBF16NNAN: // %bb.0: +; NOBF16NNAN-NEXT: ptrue p0.d +; NOBF16NNAN-NEXT: mov z4.s, #32767 // =0x7fff +; NOBF16NNAN-NEXT: fcvtx z3.s, p0/m, z3.d +; NOBF16NNAN-NEXT: fcvtx z2.s, p0/m, z2.d +; NOBF16NNAN-NEXT: fcvtx z1.s, p0/m, z1.d +; NOBF16NNAN-NEXT: fcvtx z0.s, p0/m, z0.d +; NOBF16NNAN-NEXT: lsr z5.s, z3.s, #16 +; NOBF16NNAN-NEXT: lsr z6.s, z2.s, #16 +; NOBF16NNAN-NEXT: lsr z7.s, z1.s, #16 +; NOBF16NNAN-NEXT: lsr z24.s, z0.s, #16 +; NOBF16NNAN-NEXT: add z3.s, z3.s, z4.s +; NOBF16NNAN-NEXT: add z2.s, z2.s, z4.s +; NOBF16NNAN-NEXT: add z1.s, z1.s, z4.s +; NOBF16NNAN-NEXT: add z0.s, z0.s, z4.s +; NOBF16NNAN-NEXT: and z5.s, z5.s, #0x1 +; NOBF16NNAN-NEXT: and z6.s, z6.s, #0x1 +; NOBF16NNAN-NEXT: and z7.s, z7.s, #0x1 +; NOBF16NNAN-NEXT: and z24.s, z24.s, #0x1 +; NOBF16NNAN-NEXT: add z3.s, z5.s, z3.s +; NOBF16NNAN-NEXT: add z2.s, z6.s, z2.s +; NOBF16NNAN-NEXT: add z1.s, z7.s, z1.s +; NOBF16NNAN-NEXT: add z0.s, z24.s, z0.s +; NOBF16NNAN-NEXT: lsr z3.s, z3.s, #16 +; NOBF16NNAN-NEXT: lsr z2.s, z2.s, #16 +; NOBF16NNAN-NEXT: lsr z1.s, z1.s, #16 +; NOBF16NNAN-NEXT: lsr z0.s, z0.s, #16 +; NOBF16NNAN-NEXT: uzp1 z2.s, z2.s, z3.s +; NOBF16NNAN-NEXT: uzp1 z0.s, z0.s, z1.s +; NOBF16NNAN-NEXT: uzp1 z0.h, z0.h, z2.h +; NOBF16NNAN-NEXT: ret +; +; BF16-LABEL: fptrunc_nxv8f64_to_nxv8bf16: +; BF16: // %bb.0: +; BF16-NEXT: ptrue p0.d +; BF16-NEXT: fcvtx z3.s, p0/m, z3.d +; BF16-NEXT: fcvtx z2.s, p0/m, z2.d +; BF16-NEXT: fcvtx z1.s, p0/m, z1.d +; BF16-NEXT: fcvtx z0.s, p0/m, z0.d +; BF16-NEXT: bfcvt z3.h, p0/m, z3.s +; BF16-NEXT: bfcvt z2.h, p0/m, z2.s +; BF16-NEXT: bfcvt z1.h, p0/m, z1.s +; BF16-NEXT: bfcvt z0.h, p0/m, z0.s +; BF16-NEXT: uzp1 z2.s, z2.s, z3.s +; BF16-NEXT: uzp1 z0.s, z0.s, z1.s +; BF16-NEXT: uzp1 z0.h, z0.h, z2.h +; BF16-NEXT: ret + %res = fptrunc %a to + ret %res +} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/image-waterfall-loop-O0.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/image-waterfall-loop-O0.ll index e9e7360733581a2..c9426106af5dadd 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/image-waterfall-loop-O0.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/image-waterfall-loop-O0.ll @@ -68,6 +68,9 @@ define <4 x float> @waterfall_loop(<8 x i32> %vgpr_srd) { ; CHECK-NEXT: buffer_store_dword v16, off, s[0:3], s32 ; 4-byte Folded Spill ; CHECK-NEXT: s_mov_b32 exec_lo, s21 ; CHECK-NEXT: .LBB0_1: ; =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: s_or_saveexec_b32 s21, -1 +; CHECK-NEXT: buffer_load_dword v16, off, s[0:3], s32 ; 4-byte Folded Reload +; CHECK-NEXT: s_mov_b32 exec_lo, s21 ; CHECK-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload ; CHECK-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload ; CHECK-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload @@ -84,10 +87,7 @@ define <4 x float> @waterfall_loop(<8 x i32> %vgpr_srd) { ; CHECK-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload ; CHECK-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload ; CHECK-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload -; CHECK-NEXT: s_or_saveexec_b32 s21, -1 -; CHECK-NEXT: buffer_load_dword v16, off, s[0:3], s32 ; 4-byte Folded Reload -; CHECK-NEXT: s_mov_b32 exec_lo, s21 -; CHECK-NEXT: s_waitcnt vmcnt(1) +; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: v_readfirstlane_b32 s12, v7 ; CHECK-NEXT: v_readfirstlane_b32 s10, v6 ; CHECK-NEXT: v_readfirstlane_b32 s9, v5 @@ -104,7 +104,6 @@ define <4 x float> @waterfall_loop(<8 x i32> %vgpr_srd) { ; CHECK-NEXT: s_mov_b32 s17, s6 ; CHECK-NEXT: s_mov_b32 s18, s5 ; CHECK-NEXT: s_mov_b32 s19, s4 -; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: v_writelane_b32 v16, s12, 5 ; CHECK-NEXT: v_writelane_b32 v16, s13, 6 ; CHECK-NEXT: v_writelane_b32 v16, s14, 7 @@ -138,8 +137,6 @@ define <4 x float> @waterfall_loop(<8 x i32> %vgpr_srd) { ; CHECK-NEXT: buffer_store_dword v16, off, s[0:3], s32 ; 4-byte Folded Spill ; CHECK-NEXT: s_mov_b32 exec_lo, s21 ; CHECK-NEXT: ; %bb.2: ; in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload -; CHECK-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload ; CHECK-NEXT: s_or_saveexec_b32 s21, -1 ; CHECK-NEXT: buffer_load_dword v16, off, s[0:3], s32 ; 4-byte Folded Reload ; CHECK-NEXT: s_mov_b32 exec_lo, s21 @@ -157,6 +154,9 @@ define <4 x float> @waterfall_loop(<8 x i32> %vgpr_srd) { ; CHECK-NEXT: v_readlane_b32 s17, v16, 1 ; CHECK-NEXT: v_readlane_b32 s18, v16, 2 ; CHECK-NEXT: v_readlane_b32 s19, v16, 3 +; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload +; CHECK-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload +; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: image_sample v0, v[0:1], s[8:15], s[16:19] dmask:0x1 dim:SQ_RSRC_IMG_2D ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill diff --git a/llvm/test/CodeGen/AMDGPU/bb-prolog-spill-during-regalloc.ll b/llvm/test/CodeGen/AMDGPU/bb-prolog-spill-during-regalloc.ll index 2b98f61748066f8..9988b2fa1eaf081 100644 --- a/llvm/test/CodeGen/AMDGPU/bb-prolog-spill-during-regalloc.ll +++ b/llvm/test/CodeGen/AMDGPU/bb-prolog-spill-during-regalloc.ll @@ -29,11 +29,11 @@ define i32 @prolog_spill(i32 %arg0, i32 %arg1, i32 %arg2) { ; REGALLOC-NEXT: bb.1.Flow: ; REGALLOC-NEXT: successors: %bb.2(0x40000000), %bb.4(0x40000000) ; REGALLOC-NEXT: {{ $}} - ; REGALLOC-NEXT: $vgpr0 = SI_SPILL_V32_RESTORE %stack.3, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5) ; REGALLOC-NEXT: $vgpr63 = SI_SPILL_WWM_V32_RESTORE %stack.2, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5) ; REGALLOC-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR $vgpr63, 0, implicit-def $sgpr4_sgpr5 ; REGALLOC-NEXT: $sgpr5 = SI_RESTORE_S32_FROM_VGPR $vgpr63, 1 ; REGALLOC-NEXT: renamable $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 killed renamable $sgpr4_sgpr5, implicit-def $exec, implicit-def dead $scc, implicit $exec + ; REGALLOC-NEXT: $vgpr0 = SI_SPILL_V32_RESTORE %stack.3, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5) ; REGALLOC-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.6, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.6, addrspace 5) ; REGALLOC-NEXT: renamable $sgpr4_sgpr5 = S_AND_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def dead $scc ; REGALLOC-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr4, 2, $vgpr63, implicit-def $sgpr4_sgpr5, implicit $sgpr4_sgpr5 @@ -62,11 +62,11 @@ define i32 @prolog_spill(i32 %arg0, i32 %arg1, i32 %arg2) { ; REGALLOC-NEXT: S_BRANCH %bb.1 ; REGALLOC-NEXT: {{ $}} ; REGALLOC-NEXT: bb.4.bb.3: - ; REGALLOC-NEXT: $vgpr0 = SI_SPILL_V32_RESTORE %stack.6, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.6, addrspace 5) ; REGALLOC-NEXT: $vgpr63 = SI_SPILL_WWM_V32_RESTORE %stack.2, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5) ; REGALLOC-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR $vgpr63, 2, implicit-def $sgpr4_sgpr5 ; REGALLOC-NEXT: $sgpr5 = SI_RESTORE_S32_FROM_VGPR killed $vgpr63, 3 ; REGALLOC-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def dead $scc + ; REGALLOC-NEXT: $vgpr0 = SI_SPILL_V32_RESTORE %stack.6, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.6, addrspace 5) ; REGALLOC-NEXT: renamable $vgpr0 = V_LSHL_ADD_U32_e64 killed $vgpr0, 2, $vgpr0, implicit $exec ; REGALLOC-NEXT: SI_RETURN implicit killed $vgpr0 bb.0: diff --git a/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll b/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll index 7cec15ea5be87a0..75d0b83a024ff5c 100644 --- a/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll +++ b/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll @@ -67,7 +67,6 @@ define amdgpu_kernel void @simple_nested_if(ptr addrspace(1) nocapture %arg) { ; GCN-O0-NEXT: s_mov_b64 exec, s[0:1] ; GCN-O0-NEXT: s_cbranch_execz .LBB0_4 ; GCN-O0-NEXT: ; %bb.1: ; %bb.outer.then -; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload ; GCN-O0-NEXT: s_or_saveexec_b64 s[8:9], -1 ; GCN-O0-NEXT: s_waitcnt expcnt(0) ; GCN-O0-NEXT: buffer_load_dword v4, off, s[12:15], 0 ; 4-byte Folded Reload @@ -75,12 +74,14 @@ define amdgpu_kernel void @simple_nested_if(ptr addrspace(1) nocapture %arg) { ; GCN-O0-NEXT: s_waitcnt vmcnt(0) ; GCN-O0-NEXT: v_readlane_b32 s4, v4, 0 ; GCN-O0-NEXT: v_readlane_b32 s5, v4, 1 +; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload ; GCN-O0-NEXT: s_mov_b32 s2, 0xf000 ; GCN-O0-NEXT: s_mov_b32 s0, 0 ; GCN-O0-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1 ; GCN-O0-NEXT: s_mov_b32 s1, s2 ; GCN-O0-NEXT: ; kill: def $sgpr4_sgpr5 killed $sgpr4_sgpr5 def $sgpr4_sgpr5_sgpr6_sgpr7 ; GCN-O0-NEXT: s_mov_b64 s[6:7], s[0:1] +; GCN-O0-NEXT: s_waitcnt vmcnt(0) ; GCN-O0-NEXT: v_ashrrev_i32_e64 v3, 31, v0 ; GCN-O0-NEXT: v_mov_b32_e32 v1, v0 ; GCN-O0-NEXT: v_mov_b32_e32 v2, v3 @@ -99,8 +100,6 @@ define amdgpu_kernel void @simple_nested_if(ptr addrspace(1) nocapture %arg) { ; GCN-O0-NEXT: s_mov_b64 exec, s[0:1] ; GCN-O0-NEXT: s_cbranch_execz .LBB0_3 ; GCN-O0-NEXT: ; %bb.2: ; %bb.inner.then -; GCN-O0-NEXT: s_waitcnt expcnt(1) -; GCN-O0-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload ; GCN-O0-NEXT: s_or_saveexec_b64 s[8:9], -1 ; GCN-O0-NEXT: s_waitcnt expcnt(0) ; GCN-O0-NEXT: buffer_load_dword v4, off, s[12:15], 0 ; 4-byte Folded Reload @@ -108,7 +107,9 @@ define amdgpu_kernel void @simple_nested_if(ptr addrspace(1) nocapture %arg) { ; GCN-O0-NEXT: s_waitcnt vmcnt(0) ; GCN-O0-NEXT: v_readlane_b32 s0, v4, 0 ; GCN-O0-NEXT: v_readlane_b32 s1, v4, 1 +; GCN-O0-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload ; GCN-O0-NEXT: v_mov_b32_e32 v0, 1 +; GCN-O0-NEXT: s_waitcnt vmcnt(0) ; GCN-O0-NEXT: v_add_i32_e64 v1, s[2:3], v1, v0 ; GCN-O0-NEXT: v_ashrrev_i32_e64 v3, 31, v1 ; GCN-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec @@ -235,7 +236,6 @@ define amdgpu_kernel void @uncollapsable_nested_if(ptr addrspace(1) nocapture %a ; GCN-O0-NEXT: s_mov_b64 exec, s[0:1] ; GCN-O0-NEXT: s_cbranch_execz .LBB1_3 ; GCN-O0-NEXT: ; %bb.1: ; %bb.outer.then -; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload ; GCN-O0-NEXT: s_or_saveexec_b64 s[8:9], -1 ; GCN-O0-NEXT: s_waitcnt expcnt(0) ; GCN-O0-NEXT: buffer_load_dword v4, off, s[12:15], 0 ; 4-byte Folded Reload @@ -243,12 +243,14 @@ define amdgpu_kernel void @uncollapsable_nested_if(ptr addrspace(1) nocapture %a ; GCN-O0-NEXT: s_waitcnt vmcnt(0) ; GCN-O0-NEXT: v_readlane_b32 s4, v4, 0 ; GCN-O0-NEXT: v_readlane_b32 s5, v4, 1 +; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload ; GCN-O0-NEXT: s_mov_b32 s2, 0xf000 ; GCN-O0-NEXT: s_mov_b32 s0, 0 ; GCN-O0-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1 ; GCN-O0-NEXT: s_mov_b32 s1, s2 ; GCN-O0-NEXT: ; kill: def $sgpr4_sgpr5 killed $sgpr4_sgpr5 def $sgpr4_sgpr5_sgpr6_sgpr7 ; GCN-O0-NEXT: s_mov_b64 s[6:7], s[0:1] +; GCN-O0-NEXT: s_waitcnt vmcnt(0) ; GCN-O0-NEXT: v_ashrrev_i32_e64 v3, 31, v0 ; GCN-O0-NEXT: v_mov_b32_e32 v1, v0 ; GCN-O0-NEXT: v_mov_b32_e32 v2, v3 @@ -267,8 +269,6 @@ define amdgpu_kernel void @uncollapsable_nested_if(ptr addrspace(1) nocapture %a ; GCN-O0-NEXT: s_mov_b64 exec, s[0:1] ; GCN-O0-NEXT: s_cbranch_execz .LBB1_4 ; GCN-O0-NEXT: ; %bb.2: ; %bb.inner.then -; GCN-O0-NEXT: s_waitcnt expcnt(1) -; GCN-O0-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload ; GCN-O0-NEXT: s_or_saveexec_b64 s[8:9], -1 ; GCN-O0-NEXT: s_waitcnt expcnt(0) ; GCN-O0-NEXT: buffer_load_dword v4, off, s[12:15], 0 ; 4-byte Folded Reload @@ -276,7 +276,9 @@ define amdgpu_kernel void @uncollapsable_nested_if(ptr addrspace(1) nocapture %a ; GCN-O0-NEXT: s_waitcnt vmcnt(0) ; GCN-O0-NEXT: v_readlane_b32 s0, v4, 0 ; GCN-O0-NEXT: v_readlane_b32 s1, v4, 1 +; GCN-O0-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload ; GCN-O0-NEXT: v_mov_b32_e32 v0, 1 +; GCN-O0-NEXT: s_waitcnt vmcnt(0) ; GCN-O0-NEXT: v_add_i32_e64 v1, s[2:3], v1, v0 ; GCN-O0-NEXT: v_ashrrev_i32_e64 v3, 31, v1 ; GCN-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec @@ -302,8 +304,6 @@ define amdgpu_kernel void @uncollapsable_nested_if(ptr addrspace(1) nocapture %a ; GCN-O0-NEXT: s_or_b64 exec, exec, s[0:1] ; GCN-O0-NEXT: s_branch .LBB1_5 ; GCN-O0-NEXT: .LBB1_4: ; %bb.inner.end -; GCN-O0-NEXT: s_waitcnt expcnt(1) -; GCN-O0-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload ; GCN-O0-NEXT: s_or_saveexec_b64 s[8:9], -1 ; GCN-O0-NEXT: s_waitcnt expcnt(0) ; GCN-O0-NEXT: buffer_load_dword v4, off, s[12:15], 0 ; 4-byte Folded Reload @@ -314,7 +314,9 @@ define amdgpu_kernel void @uncollapsable_nested_if(ptr addrspace(1) nocapture %a ; GCN-O0-NEXT: s_or_b64 exec, exec, s[2:3] ; GCN-O0-NEXT: v_readlane_b32 s0, v4, 0 ; GCN-O0-NEXT: v_readlane_b32 s1, v4, 1 +; GCN-O0-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload ; GCN-O0-NEXT: v_mov_b32_e32 v0, 2 +; GCN-O0-NEXT: s_waitcnt vmcnt(0) ; GCN-O0-NEXT: v_add_i32_e64 v1, s[2:3], v1, v0 ; GCN-O0-NEXT: v_ashrrev_i32_e64 v3, 31, v1 ; GCN-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec @@ -454,18 +456,17 @@ define amdgpu_kernel void @nested_if_if_else(ptr addrspace(1) nocapture %arg) { ; GCN-O0-NEXT: s_mov_b64 exec, s[0:1] ; GCN-O0-NEXT: s_cbranch_execz .LBB2_6 ; GCN-O0-NEXT: ; %bb.1: ; %bb.outer.then -; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload ; GCN-O0-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GCN-O0-NEXT: s_waitcnt expcnt(0) ; GCN-O0-NEXT: buffer_load_dword v4, off, s[12:15], 0 ; 4-byte Folded Reload ; GCN-O0-NEXT: s_mov_b64 exec, s[6:7] +; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload ; GCN-O0-NEXT: s_mov_b32 s0, 2 -; GCN-O0-NEXT: s_waitcnt vmcnt(1) +; GCN-O0-NEXT: s_waitcnt vmcnt(0) ; GCN-O0-NEXT: v_cmp_ne_u32_e64 s[0:1], v0, s0 ; GCN-O0-NEXT: s_mov_b64 s[2:3], exec ; GCN-O0-NEXT: s_and_b64 s[0:1], s[2:3], s[0:1] ; GCN-O0-NEXT: s_xor_b64 s[2:3], s[0:1], s[2:3] -; GCN-O0-NEXT: s_waitcnt vmcnt(0) ; GCN-O0-NEXT: v_writelane_b32 v4, s2, 4 ; GCN-O0-NEXT: v_writelane_b32 v4, s3, 5 ; GCN-O0-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -492,7 +493,6 @@ define amdgpu_kernel void @nested_if_if_else(ptr addrspace(1) nocapture %arg) { ; GCN-O0-NEXT: s_xor_b64 exec, exec, s[0:1] ; GCN-O0-NEXT: s_cbranch_execz .LBB2_5 ; GCN-O0-NEXT: ; %bb.3: ; %bb.then -; GCN-O0-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload ; GCN-O0-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GCN-O0-NEXT: s_waitcnt expcnt(0) ; GCN-O0-NEXT: buffer_load_dword v4, off, s[12:15], 0 ; 4-byte Folded Reload @@ -500,7 +500,9 @@ define amdgpu_kernel void @nested_if_if_else(ptr addrspace(1) nocapture %arg) { ; GCN-O0-NEXT: s_waitcnt vmcnt(0) ; GCN-O0-NEXT: v_readlane_b32 s0, v4, 0 ; GCN-O0-NEXT: v_readlane_b32 s1, v4, 1 +; GCN-O0-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload ; GCN-O0-NEXT: v_mov_b32_e32 v0, 1 +; GCN-O0-NEXT: s_waitcnt vmcnt(0) ; GCN-O0-NEXT: v_add_i32_e64 v1, s[2:3], v1, v0 ; GCN-O0-NEXT: v_ashrrev_i32_e64 v3, 31, v1 ; GCN-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec @@ -516,7 +518,6 @@ define amdgpu_kernel void @nested_if_if_else(ptr addrspace(1) nocapture %arg) { ; GCN-O0-NEXT: buffer_store_dword v0, v[1:2], s[0:3], 0 addr64 ; GCN-O0-NEXT: s_branch .LBB2_5 ; GCN-O0-NEXT: .LBB2_4: ; %bb.else -; GCN-O0-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload ; GCN-O0-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GCN-O0-NEXT: s_waitcnt expcnt(0) ; GCN-O0-NEXT: buffer_load_dword v4, off, s[12:15], 0 ; 4-byte Folded Reload @@ -524,7 +525,9 @@ define amdgpu_kernel void @nested_if_if_else(ptr addrspace(1) nocapture %arg) { ; GCN-O0-NEXT: s_waitcnt vmcnt(0) ; GCN-O0-NEXT: v_readlane_b32 s0, v4, 0 ; GCN-O0-NEXT: v_readlane_b32 s1, v4, 1 +; GCN-O0-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload ; GCN-O0-NEXT: v_mov_b32_e32 v0, 2 +; GCN-O0-NEXT: s_waitcnt vmcnt(0) ; GCN-O0-NEXT: v_add_i32_e64 v1, s[2:3], v1, v0 ; GCN-O0-NEXT: v_ashrrev_i32_e64 v3, 31, v1 ; GCN-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec @@ -721,13 +724,13 @@ define amdgpu_kernel void @nested_if_else_if(ptr addrspace(1) nocapture %arg) { ; GCN-O0-NEXT: s_xor_b64 exec, exec, s[0:1] ; GCN-O0-NEXT: s_cbranch_execz .LBB3_8 ; GCN-O0-NEXT: ; %bb.2: ; %bb.outer.then -; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 offset:12 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v2, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v3, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload ; GCN-O0-NEXT: s_or_saveexec_b64 s[8:9], -1 ; GCN-O0-NEXT: s_waitcnt expcnt(0) ; GCN-O0-NEXT: buffer_load_dword v6, off, s[12:15], 0 ; 4-byte Folded Reload ; GCN-O0-NEXT: s_mov_b64 exec, s[8:9] +; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 offset:12 ; 4-byte Folded Reload +; GCN-O0-NEXT: buffer_load_dword v2, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload +; GCN-O0-NEXT: buffer_load_dword v3, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload ; GCN-O0-NEXT: s_mov_b32 s0, 0xf000 ; GCN-O0-NEXT: s_mov_b32 s2, 0 ; GCN-O0-NEXT: s_mov_b32 s4, s2 @@ -737,12 +740,11 @@ define amdgpu_kernel void @nested_if_else_if(ptr addrspace(1) nocapture %arg) { ; GCN-O0-NEXT: ; kill: def $sgpr0_sgpr1 killed $sgpr0_sgpr1 def $sgpr0_sgpr1_sgpr2_sgpr3 ; GCN-O0-NEXT: s_mov_b64 s[2:3], s[4:5] ; GCN-O0-NEXT: v_mov_b32_e32 v1, 1 -; GCN-O0-NEXT: s_waitcnt vmcnt(1) +; GCN-O0-NEXT: s_waitcnt vmcnt(0) ; GCN-O0-NEXT: buffer_store_dword v1, v[2:3], s[0:3], 0 addr64 offset:4 ; GCN-O0-NEXT: s_mov_b32 s0, 2 ; GCN-O0-NEXT: v_cmp_eq_u32_e64 s[2:3], v0, s0 ; GCN-O0-NEXT: s_mov_b64 s[0:1], exec -; GCN-O0-NEXT: s_waitcnt vmcnt(1) ; GCN-O0-NEXT: v_writelane_b32 v6, s0, 4 ; GCN-O0-NEXT: v_writelane_b32 v6, s1, 5 ; GCN-O0-NEXT: s_or_saveexec_b64 s[8:9], -1 @@ -768,13 +770,13 @@ define amdgpu_kernel void @nested_if_else_if(ptr addrspace(1) nocapture %arg) { ; GCN-O0-NEXT: buffer_store_dword v0, v[1:2], s[0:3], 0 addr64 offset:8 ; GCN-O0-NEXT: s_branch .LBB3_7 ; GCN-O0-NEXT: .LBB3_4: ; %bb.outer.else -; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 offset:12 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v2, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v3, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload ; GCN-O0-NEXT: s_or_saveexec_b64 s[8:9], -1 ; GCN-O0-NEXT: s_waitcnt expcnt(0) ; GCN-O0-NEXT: buffer_load_dword v6, off, s[12:15], 0 ; 4-byte Folded Reload ; GCN-O0-NEXT: s_mov_b64 exec, s[8:9] +; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 offset:12 ; 4-byte Folded Reload +; GCN-O0-NEXT: buffer_load_dword v2, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload +; GCN-O0-NEXT: buffer_load_dword v3, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload ; GCN-O0-NEXT: s_mov_b32 s1, 0xf000 ; GCN-O0-NEXT: s_mov_b32 s0, 0 ; GCN-O0-NEXT: s_mov_b32 s2, s0 @@ -784,11 +786,10 @@ define amdgpu_kernel void @nested_if_else_if(ptr addrspace(1) nocapture %arg) { ; GCN-O0-NEXT: ; kill: def $sgpr4_sgpr5 killed $sgpr4_sgpr5 def $sgpr4_sgpr5_sgpr6_sgpr7 ; GCN-O0-NEXT: s_mov_b64 s[6:7], s[2:3] ; GCN-O0-NEXT: v_mov_b32_e32 v1, 3 -; GCN-O0-NEXT: s_waitcnt vmcnt(1) +; GCN-O0-NEXT: s_waitcnt vmcnt(0) ; GCN-O0-NEXT: buffer_store_dword v1, v[2:3], s[4:7], 0 addr64 offset:12 ; GCN-O0-NEXT: v_cmp_eq_u32_e64 s[2:3], v0, s0 ; GCN-O0-NEXT: s_mov_b64 s[0:1], exec -; GCN-O0-NEXT: s_waitcnt vmcnt(1) ; GCN-O0-NEXT: v_writelane_b32 v6, s0, 6 ; GCN-O0-NEXT: v_writelane_b32 v6, s1, 7 ; GCN-O0-NEXT: s_or_saveexec_b64 s[8:9], -1 @@ -926,7 +927,6 @@ define amdgpu_kernel void @s_endpgm_unsafe_barrier(ptr addrspace(1) nocapture %a ; GCN-O0-NEXT: s_mov_b64 exec, s[0:1] ; GCN-O0-NEXT: s_cbranch_execz .LBB4_2 ; GCN-O0-NEXT: ; %bb.1: ; %bb.then -; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload ; GCN-O0-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GCN-O0-NEXT: s_waitcnt expcnt(0) ; GCN-O0-NEXT: buffer_load_dword v3, off, s[12:15], 0 ; 4-byte Folded Reload @@ -934,12 +934,14 @@ define amdgpu_kernel void @s_endpgm_unsafe_barrier(ptr addrspace(1) nocapture %a ; GCN-O0-NEXT: s_waitcnt vmcnt(0) ; GCN-O0-NEXT: v_readlane_b32 s0, v3, 0 ; GCN-O0-NEXT: v_readlane_b32 s1, v3, 1 +; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload ; GCN-O0-NEXT: s_mov_b32 s2, 0xf000 ; GCN-O0-NEXT: s_mov_b32 s4, 0 ; GCN-O0-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 ; GCN-O0-NEXT: s_mov_b32 s5, s2 ; GCN-O0-NEXT: ; kill: def $sgpr0_sgpr1 killed $sgpr0_sgpr1 def $sgpr0_sgpr1_sgpr2_sgpr3 ; GCN-O0-NEXT: s_mov_b64 s[2:3], s[4:5] +; GCN-O0-NEXT: s_waitcnt vmcnt(0) ; GCN-O0-NEXT: v_ashrrev_i32_e64 v2, 31, v0 ; GCN-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec ; GCN-O0-NEXT: v_mov_b32_e32 v1, v2 @@ -1064,8 +1066,6 @@ define void @scc_liveness(i32 %arg) local_unnamed_addr #0 { ; GCN-O0-NEXT: s_mov_b64 exec, s[14:15] ; GCN-O0-NEXT: .LBB5_1: ; %bb1 ; GCN-O0-NEXT: ; =>This Inner Loop Header: Depth=1 -; GCN-O0-NEXT: s_waitcnt expcnt(1) -; GCN-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload ; GCN-O0-NEXT: s_or_saveexec_b64 s[14:15], -1 ; GCN-O0-NEXT: s_waitcnt expcnt(0) ; GCN-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 ; 4-byte Folded Reload @@ -1077,7 +1077,9 @@ define void @scc_liveness(i32 %arg) local_unnamed_addr #0 { ; GCN-O0-NEXT: v_readlane_b32 s7, v6, 1 ; GCN-O0-NEXT: v_writelane_b32 v6, s6, 4 ; GCN-O0-NEXT: v_writelane_b32 v6, s7, 5 +; GCN-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload ; GCN-O0-NEXT: s_mov_b32 s4, 0x207 +; GCN-O0-NEXT: s_waitcnt vmcnt(0) ; GCN-O0-NEXT: v_cmp_lt_i32_e64 s[4:5], v0, s4 ; GCN-O0-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9] ; GCN-O0-NEXT: v_writelane_b32 v6, s4, 6 @@ -1094,7 +1096,6 @@ define void @scc_liveness(i32 %arg) local_unnamed_addr #0 { ; GCN-O0-NEXT: s_cbranch_execnz .LBB5_1 ; GCN-O0-NEXT: ; %bb.2: ; %bb2 ; GCN-O0-NEXT: ; in Loop: Header=BB5_1 Depth=1 -; GCN-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload ; GCN-O0-NEXT: s_or_saveexec_b64 s[14:15], -1 ; GCN-O0-NEXT: s_waitcnt expcnt(0) ; GCN-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 ; 4-byte Folded Reload @@ -1103,7 +1104,9 @@ define void @scc_liveness(i32 %arg) local_unnamed_addr #0 { ; GCN-O0-NEXT: v_readlane_b32 s4, v6, 6 ; GCN-O0-NEXT: v_readlane_b32 s5, v6, 7 ; GCN-O0-NEXT: s_or_b64 exec, exec, s[4:5] +; GCN-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload ; GCN-O0-NEXT: s_mov_b32 s6, 0 +; GCN-O0-NEXT: s_waitcnt vmcnt(0) ; GCN-O0-NEXT: v_cmp_ne_u32_e64 s[4:5], v0, s6 ; GCN-O0-NEXT: v_cmp_eq_u32_e64 s[6:7], v0, s6 ; GCN-O0-NEXT: v_writelane_b32 v6, s4, 8 @@ -1188,46 +1191,48 @@ define void @scc_liveness(i32 %arg) local_unnamed_addr #0 { ; GCN-O0-NEXT: s_branch .LBB5_6 ; GCN-O0-NEXT: .LBB5_5: ; %Flow2 ; GCN-O0-NEXT: ; in Loop: Header=BB5_1 Depth=1 -; GCN-O0-NEXT: s_waitcnt expcnt(3) -; GCN-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt expcnt(2) -; GCN-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt expcnt(1) -; GCN-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt expcnt(0) -; GCN-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload ; GCN-O0-NEXT: s_or_saveexec_b64 s[14:15], -1 +; GCN-O0-NEXT: s_waitcnt expcnt(0) ; GCN-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 ; 4-byte Folded Reload ; GCN-O0-NEXT: s_mov_b64 exec, s[14:15] ; GCN-O0-NEXT: s_waitcnt vmcnt(0) ; GCN-O0-NEXT: v_readlane_b32 s4, v6, 10 ; GCN-O0-NEXT: v_readlane_b32 s5, v6, 11 ; GCN-O0-NEXT: s_or_b64 exec, exec, s[4:5] +; GCN-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload +; GCN-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload +; GCN-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload +; GCN-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload +; GCN-O0-NEXT: s_waitcnt vmcnt(3) ; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; GCN-O0-NEXT: s_waitcnt vmcnt(3) ; GCN-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; GCN-O0-NEXT: s_waitcnt vmcnt(3) ; GCN-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill +; GCN-O0-NEXT: s_waitcnt vmcnt(3) ; GCN-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill ; GCN-O0-NEXT: s_branch .LBB5_7 ; GCN-O0-NEXT: .LBB5_6: ; %Flow ; GCN-O0-NEXT: ; in Loop: Header=BB5_1 Depth=1 -; GCN-O0-NEXT: s_waitcnt expcnt(3) -; GCN-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt expcnt(2) -; GCN-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt expcnt(1) -; GCN-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt expcnt(0) -; GCN-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload ; GCN-O0-NEXT: s_or_saveexec_b64 s[14:15], -1 +; GCN-O0-NEXT: s_waitcnt expcnt(0) ; GCN-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 ; 4-byte Folded Reload ; GCN-O0-NEXT: s_mov_b64 exec, s[14:15] ; GCN-O0-NEXT: s_waitcnt vmcnt(0) ; GCN-O0-NEXT: v_readlane_b32 s4, v6, 12 ; GCN-O0-NEXT: v_readlane_b32 s5, v6, 13 ; GCN-O0-NEXT: s_or_b64 exec, exec, s[4:5] +; GCN-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload +; GCN-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload +; GCN-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload +; GCN-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload +; GCN-O0-NEXT: s_waitcnt vmcnt(3) ; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GCN-O0-NEXT: s_waitcnt vmcnt(3) ; GCN-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GCN-O0-NEXT: s_waitcnt vmcnt(3) ; GCN-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; GCN-O0-NEXT: s_waitcnt vmcnt(3) ; GCN-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill ; GCN-O0-NEXT: s_branch .LBB5_5 ; GCN-O0-NEXT: .LBB5_7: ; %bb10 @@ -1266,14 +1271,6 @@ define void @scc_liveness(i32 %arg) local_unnamed_addr #0 { ; GCN-O0-NEXT: s_mov_b64 exec, s[14:15] ; GCN-O0-NEXT: .LBB5_9: ; %Flow3 ; GCN-O0-NEXT: ; in Loop: Header=BB5_1 Depth=1 -; GCN-O0-NEXT: s_waitcnt expcnt(4) -; GCN-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt expcnt(3) -; GCN-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt expcnt(2) -; GCN-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt expcnt(1) -; GCN-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload ; GCN-O0-NEXT: s_or_saveexec_b64 s[14:15], -1 ; GCN-O0-NEXT: s_waitcnt expcnt(0) ; GCN-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 ; 4-byte Folded Reload @@ -1286,6 +1283,10 @@ define void @scc_liveness(i32 %arg) local_unnamed_addr #0 { ; GCN-O0-NEXT: v_readlane_b32 s7, v6, 5 ; GCN-O0-NEXT: v_readlane_b32 s4, v6, 14 ; GCN-O0-NEXT: v_readlane_b32 s5, v6, 15 +; GCN-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload +; GCN-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload +; GCN-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload +; GCN-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload ; GCN-O0-NEXT: s_and_b64 s[4:5], exec, s[4:5] ; GCN-O0-NEXT: s_or_b64 s[4:5], s[4:5], s[6:7] ; GCN-O0-NEXT: s_mov_b64 s[6:7], 0 @@ -1300,9 +1301,13 @@ define void @scc_liveness(i32 %arg) local_unnamed_addr #0 { ; GCN-O0-NEXT: s_or_saveexec_b64 s[14:15], -1 ; GCN-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 ; 4-byte Folded Spill ; GCN-O0-NEXT: s_mov_b64 exec, s[14:15] +; GCN-O0-NEXT: s_waitcnt vmcnt(4) ; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill +; GCN-O0-NEXT: s_waitcnt vmcnt(4) ; GCN-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill +; GCN-O0-NEXT: s_waitcnt vmcnt(4) ; GCN-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill +; GCN-O0-NEXT: s_waitcnt vmcnt(4) ; GCN-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill ; GCN-O0-NEXT: s_andn2_b64 exec, exec, s[4:5] ; GCN-O0-NEXT: s_cbranch_execnz .LBB5_1 diff --git a/llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll b/llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll index 7c09fec908f93e7..08644572372c362 100644 --- a/llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll +++ b/llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll @@ -46,9 +46,6 @@ ; VMEM: [[ENDIF]]: -; Restore val -; VGPR: buffer_load_dword [[RELOAD_VAL:v[0-9]+]], off, s[0:3], 0 offset:[[VAL_OFFSET]] ; 4-byte Folded Reload - ; Reload and restore exec mask ; VGPR: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_LO:[0-9]+]], [[SPILL_VGPR]], [[SAVEEXEC_LO_LANE]] ; VGPR: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_HI:[0-9]+]], [[SPILL_VGPR]], [[SAVEEXEC_HI_LANE]] @@ -61,7 +58,7 @@ ; GCN: s_or_b64 exec, exec, s[[[S_RELOAD_SAVEEXEC_LO]]:[[S_RELOAD_SAVEEXEC_HI]]] ; Restore val -; VMEM: buffer_load_dword [[RELOAD_VAL:v[0-9]+]], off, s[0:3], 0 offset:[[VAL_OFFSET]] ; 4-byte Folded Reload +; GCN: buffer_load_dword [[RELOAD_VAL:v[0-9]+]], off, s[0:3], 0 offset:[[VAL_OFFSET]] ; 4-byte Folded Reload ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RELOAD_VAL]] @@ -123,7 +120,6 @@ endif: ; GCN: buffer_store_dword v[[VAL_LOOP_RELOAD]], off, s[0:3], 0 offset:[[VAL_SUB_OFFSET:[0-9]+]] ; 4-byte Folded Spill ; GCN: [[END]]: -; VGPR: buffer_load_dword v[[VAL_END:[0-9]+]], off, s[0:3], 0 offset:[[VAL_SUB_OFFSET]] ; 4-byte Folded Reload ; VGPR: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_LO:[0-9]+]], [[SPILL_VGPR]], [[SAVEEXEC_LO_LANE]] ; VGPR: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_HI:[0-9]+]], [[SPILL_VGPR]], [[SAVEEXEC_HI_LANE]] @@ -134,7 +130,7 @@ endif: ; GCN: s_or_b64 exec, exec, s[[[S_RELOAD_SAVEEXEC_LO]]:[[S_RELOAD_SAVEEXEC_HI]]] -; VMEM: buffer_load_dword v[[VAL_END:[0-9]+]], off, s[0:3], 0 offset:[[VAL_SUB_OFFSET]] ; 4-byte Folded Reload +; GCN: buffer_load_dword v[[VAL_END:[0-9]+]], off, s[0:3], 0 offset:[[VAL_SUB_OFFSET]] ; 4-byte Folded Reload ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v[[VAL_END]] @@ -194,7 +190,6 @@ end: ; GCN-NEXT: s_branch [[ELSE:.LBB[0-9]+_[0-9]+]] ; GCN: [[FLOW]]: ; %Flow -; VGPR: buffer_load_dword [[FLOW_VAL:v[0-9]+]], off, s[0:3], 0 offset:[[FLOW_VAL_OFFSET:[0-9]+]] ; 4-byte Folded Reload ; VGPR: buffer_load_dword [[SPILL_VGPR:v[0-9]+]], off, s[0:3], 0 ; 4-byte Folded Reload ; VGPR: v_readlane_b32 s[[FLOW_S_RELOAD_SAVEEXEC_LO:[0-9]+]], [[SPILL_VGPR]], [[SAVEEXEC_LO_LANE]] ; VGPR: v_readlane_b32 s[[FLOW_S_RELOAD_SAVEEXEC_HI:[0-9]+]], [[SPILL_VGPR]], [[SAVEEXEC_HI_LANE]] @@ -206,7 +201,7 @@ end: ; GCN: s_or_saveexec_b64 s[[[FLOW_S_RELOAD_SAVEEXEC_LO_SAVEEXEC:[0-9]+]]:[[FLOW_S_RELOAD_SAVEEXEC_HI_SAVEEXEC:[0-9]+]]], s[[[FLOW_S_RELOAD_SAVEEXEC_LO]]:[[FLOW_S_RELOAD_SAVEEXEC_HI]]] -; VMEM: buffer_load_dword [[FLOW_VAL:v[0-9]+]], off, s[0:3], 0 offset:[[FLOW_VAL_OFFSET:[0-9]+]] ; 4-byte Folded Reload +; GCN: buffer_load_dword [[FLOW_VAL:v[0-9]+]], off, s[0:3], 0 offset:[[FLOW_VAL_OFFSET:[0-9]+]] ; 4-byte Folded Reload ; Regular spill value restored after exec modification ; Followed by spill @@ -240,7 +235,6 @@ end: ; GCN-NEXT: s_branch [[FLOW]] ; GCN: [[ENDIF]]: -; VGPR: buffer_load_dword v[[RESULT:[0-9]+]], off, s[0:3], 0 offset:[[RESULT_OFFSET]] ; 4-byte Folded Reload ; VGPR: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_LO:[0-9]+]], [[SPILL_VGPR]], [[FLOW_SAVEEXEC_LO_LANE]] ; VGPR: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_HI:[0-9]+]], [[SPILL_VGPR]], [[FLOW_SAVEEXEC_HI_LANE]] @@ -252,7 +246,7 @@ end: ; GCN: s_or_b64 exec, exec, s[[[S_RELOAD_SAVEEXEC_LO]]:[[S_RELOAD_SAVEEXEC_HI]]] -; VMEM: buffer_load_dword v[[RESULT:[0-9]+]], off, s[0:3], 0 offset:[[RESULT_OFFSET]] ; 4-byte Folded Reload +; GCN: buffer_load_dword v[[RESULT:[0-9]+]], off, s[0:3], 0 offset:[[RESULT_OFFSET]] ; 4-byte Folded Reload ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v[[RESULT]] define amdgpu_kernel void @divergent_if_else_endif(ptr addrspace(1) %out) #0 { diff --git a/llvm/test/CodeGen/AMDGPU/div_i128.ll b/llvm/test/CodeGen/AMDGPU/div_i128.ll index 6686742e449f5cf..d94ec56842ab870 100644 --- a/llvm/test/CodeGen/AMDGPU/div_i128.ll +++ b/llvm/test/CodeGen/AMDGPU/div_i128.ll @@ -581,10 +581,6 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_branch .LBB0_5 ; GFX9-O0-NEXT: .LBB0_3: ; %Flow2 -; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload ; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1 ; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX9-O0-NEXT: s_mov_b64 exec, s[22:23] @@ -592,8 +588,13 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: v_readlane_b32 s4, v30, 4 ; GFX9-O0-NEXT: v_readlane_b32 s5, v30, 5 ; GFX9-O0-NEXT: s_or_b64 exec, exec, s[4:5] +; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload +; GFX9-O0-NEXT: s_waitcnt vmcnt(1) ; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:160 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_nop 0 +; GFX9-O0-NEXT: s_waitcnt vmcnt(1) ; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:164 ; 4-byte Folded Spill ; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:152 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 @@ -640,6 +641,13 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_branch .LBB0_3 ; GFX9-O0-NEXT: .LBB0_5: ; %Flow1 +; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1 +; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX9-O0-NEXT: s_mov_b64 exec, s[22:23] +; GFX9-O0-NEXT: s_waitcnt vmcnt(0) +; GFX9-O0-NEXT: v_readlane_b32 s4, v30, 8 +; GFX9-O0-NEXT: v_readlane_b32 s5, v30, 9 +; GFX9-O0-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:112 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:116 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:104 ; 4-byte Folded Reload @@ -648,15 +656,9 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:100 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:88 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:92 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1 -; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_mov_b64 exec, s[22:23] -; GFX9-O0-NEXT: s_waitcnt vmcnt(0) -; GFX9-O0-NEXT: v_readlane_b32 s4, v30, 8 -; GFX9-O0-NEXT: v_readlane_b32 s5, v30, 9 -; GFX9-O0-NEXT: s_or_b64 exec, exec, s[4:5] +; GFX9-O0-NEXT: s_waitcnt vmcnt(1) ; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:176 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_nop 0 +; GFX9-O0-NEXT: s_waitcnt vmcnt(1) ; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:180 ; 4-byte Folded Spill ; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:168 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 @@ -670,6 +672,12 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: s_branch .LBB0_4 ; GFX9-O0-NEXT: .LBB0_6: ; %udiv-do-while ; GFX9-O0-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1 +; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX9-O0-NEXT: s_mov_b64 exec, s[22:23] +; GFX9-O0-NEXT: s_waitcnt vmcnt(0) +; GFX9-O0-NEXT: v_readlane_b32 s6, v30, 10 +; GFX9-O0-NEXT: v_readlane_b32 s7, v30, 11 ; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:200 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:204 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:208 ; 4-byte Folded Reload @@ -694,13 +702,8 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:268 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:272 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:276 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1 -; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_mov_b64 exec, s[22:23] -; GFX9-O0-NEXT: s_waitcnt vmcnt(0) -; GFX9-O0-NEXT: v_readlane_b32 s6, v30, 10 -; GFX9-O0-NEXT: v_readlane_b32 s7, v30, 11 ; GFX9-O0-NEXT: s_mov_b32 s4, 63 +; GFX9-O0-NEXT: s_waitcnt vmcnt(16) ; GFX9-O0-NEXT: v_lshrrev_b64 v[28:29], s4, v[2:3] ; GFX9-O0-NEXT: v_mov_b32_e32 v5, v29 ; GFX9-O0-NEXT: s_mov_b32 s5, 1 @@ -727,6 +730,7 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: v_lshrrev_b64 v[0:1], s4, v[0:1] ; GFX9-O0-NEXT: v_mov_b32_e32 v7, v1 ; GFX9-O0-NEXT: v_mov_b32_e32 v6, v29 +; GFX9-O0-NEXT: s_waitcnt vmcnt(10) ; GFX9-O0-NEXT: v_mov_b32_e32 v10, v27 ; GFX9-O0-NEXT: v_or3_b32 v6, v6, v7, v10 ; GFX9-O0-NEXT: v_mov_b32_e32 v1, v0 @@ -736,6 +740,7 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec ; GFX9-O0-NEXT: v_mov_b32_e32 v1, v6 ; GFX9-O0-NEXT: v_mov_b32_e32 v7, v3 +; GFX9-O0-NEXT: s_waitcnt vmcnt(8) ; GFX9-O0-NEXT: v_mov_b32_e32 v6, v25 ; GFX9-O0-NEXT: v_or_b32_e64 v6, v6, v7 ; GFX9-O0-NEXT: v_mov_b32_e32 v3, v2 @@ -747,10 +752,12 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: v_mov_b32_e32 v10, v5 ; GFX9-O0-NEXT: v_mov_b32_e32 v4, v22 ; GFX9-O0-NEXT: v_mov_b32_e32 v5, v23 +; GFX9-O0-NEXT: s_waitcnt vmcnt(1) ; GFX9-O0-NEXT: v_mov_b32_e32 v13, v11 ; GFX9-O0-NEXT: v_mov_b32_e32 v11, v14 ; GFX9-O0-NEXT: v_mov_b32_e32 v7, v15 ; GFX9-O0-NEXT: v_sub_co_u32_e32 v13, vcc, v13, v6 +; GFX9-O0-NEXT: s_waitcnt vmcnt(0) ; GFX9-O0-NEXT: v_subb_co_u32_e32 v12, vcc, v12, v10, vcc ; GFX9-O0-NEXT: v_subb_co_u32_e32 v11, vcc, v11, v4, vcc ; GFX9-O0-NEXT: v_subb_co_u32_e32 v7, vcc, v7, v5, vcc @@ -890,6 +897,9 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: s_cbranch_execnz .LBB0_6 ; GFX9-O0-NEXT: s_branch .LBB0_1 ; GFX9-O0-NEXT: .LBB0_7: ; %udiv-preheader +; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1 +; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX9-O0-NEXT: s_mov_b64 exec, s[22:23] ; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:280 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:284 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:288 ; 4-byte Folded Reload @@ -906,12 +916,9 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1 -; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_mov_b64 exec, s[22:23] -; GFX9-O0-NEXT: s_waitcnt vmcnt(10) +; GFX9-O0-NEXT: s_waitcnt vmcnt(9) ; GFX9-O0-NEXT: v_mov_b32_e32 v4, v10 -; GFX9-O0-NEXT: s_waitcnt vmcnt(1) +; GFX9-O0-NEXT: s_waitcnt vmcnt(0) ; GFX9-O0-NEXT: v_lshrrev_b64 v[6:7], v4, v[20:21] ; GFX9-O0-NEXT: v_mov_b32_e32 v5, v7 ; GFX9-O0-NEXT: s_mov_b32 s6, 64 @@ -992,7 +999,6 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: v_mov_b32_e32 v14, s8 ; GFX9-O0-NEXT: v_mov_b32_e32 v13, s7 ; GFX9-O0-NEXT: v_mov_b32_e32 v12, s6 -; GFX9-O0-NEXT: s_waitcnt vmcnt(4) ; GFX9-O0-NEXT: v_writelane_b32 v30, s4, 10 ; GFX9-O0-NEXT: v_writelane_b32 v30, s5, 11 ; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1 @@ -1024,6 +1030,9 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:204 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_branch .LBB0_6 ; GFX9-O0-NEXT: .LBB0_8: ; %udiv-bb1 +; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1 +; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX9-O0-NEXT: s_mov_b64 exec, s[22:23] ; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload @@ -1032,12 +1041,9 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1 -; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_mov_b64 exec, s[22:23] ; GFX9-O0-NEXT: s_mov_b64 s[6:7], 1 ; GFX9-O0-NEXT: s_mov_b32 s5, s6 -; GFX9-O0-NEXT: s_waitcnt vmcnt(2) +; GFX9-O0-NEXT: s_waitcnt vmcnt(1) ; GFX9-O0-NEXT: v_mov_b32_e32 v3, v0 ; GFX9-O0-NEXT: s_mov_b32 s4, s7 ; GFX9-O0-NEXT: s_mov_b64 s[6:7], 0 @@ -1048,7 +1054,7 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: v_mov_b32_e32 v4, s5 ; GFX9-O0-NEXT: v_add_co_u32_e32 v8, vcc, v3, v4 ; GFX9-O0-NEXT: v_mov_b32_e32 v4, s4 -; GFX9-O0-NEXT: s_waitcnt vmcnt(1) +; GFX9-O0-NEXT: s_waitcnt vmcnt(0) ; GFX9-O0-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v4, vcc ; GFX9-O0-NEXT: v_mov_b32_e32 v4, s8 ; GFX9-O0-NEXT: v_addc_co_u32_e32 v0, vcc, v0, v4, vcc @@ -1152,7 +1158,6 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: s_mov_b64 s[6:7], exec ; GFX9-O0-NEXT: s_and_b64 s[4:5], s[6:7], s[4:5] ; GFX9-O0-NEXT: s_xor_b64 s[6:7], s[4:5], s[6:7] -; GFX9-O0-NEXT: s_waitcnt vmcnt(16) ; GFX9-O0-NEXT: v_writelane_b32 v30, s6, 8 ; GFX9-O0-NEXT: v_writelane_b32 v30, s7, 9 ; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1 @@ -1710,10 +1715,6 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:112 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: s_branch .LBB0_5 ; GFX9-G-O0-NEXT: .LBB0_3: ; %Flow2 -; GFX9-G-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[20:21], -1 ; GFX9-G-O0-NEXT: buffer_load_dword v34, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: s_mov_b64 exec, s[20:21] @@ -1721,10 +1722,17 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-G-O0-NEXT: v_readlane_b32 s4, v34, 0 ; GFX9-G-O0-NEXT: v_readlane_b32 s5, v34, 1 ; GFX9-G-O0-NEXT: s_or_b64 exec, exec, s[4:5] +; GFX9-G-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3) ; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:164 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: s_nop 0 +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3) ; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:168 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3) ; GFX9-G-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:172 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3) ; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:176 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: s_branch .LBB0_9 ; GFX9-G-O0-NEXT: .LBB0_4: ; %udiv-loop-exit @@ -1784,6 +1792,13 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: s_branch .LBB0_3 ; GFX9-G-O0-NEXT: .LBB0_5: ; %Flow1 +; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[20:21], -1 +; GFX9-G-O0-NEXT: buffer_load_dword v34, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: s_mov_b64 exec, s[20:21] +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0) +; GFX9-G-O0-NEXT: v_readlane_b32 s4, v34, 4 +; GFX9-G-O0-NEXT: v_readlane_b32 s5, v34, 5 +; GFX9-G-O0-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX9-G-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:116 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:120 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:124 ; 4-byte Folded Reload @@ -1792,17 +1807,13 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-G-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:104 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:108 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:112 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[20:21], -1 -; GFX9-G-O0-NEXT: buffer_load_dword v34, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: s_mov_b64 exec, s[20:21] -; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0) -; GFX9-G-O0-NEXT: v_readlane_b32 s4, v34, 4 -; GFX9-G-O0-NEXT: v_readlane_b32 s5, v34, 5 -; GFX9-G-O0-NEXT: s_or_b64 exec, exec, s[4:5] +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3) ; GFX9-G-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:180 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: s_nop 0 +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3) ; GFX9-G-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:184 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3) ; GFX9-G-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:188 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3) ; GFX9-G-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:192 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:196 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: s_nop 0 @@ -1812,6 +1823,12 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-G-O0-NEXT: s_branch .LBB0_4 ; GFX9-G-O0-NEXT: .LBB0_6: ; %udiv-do-while ; GFX9-G-O0-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[20:21], -1 +; GFX9-G-O0-NEXT: buffer_load_dword v34, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: s_mov_b64 exec, s[20:21] +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0) +; GFX9-G-O0-NEXT: v_readlane_b32 s6, v34, 6 +; GFX9-G-O0-NEXT: v_readlane_b32 s7, v34, 7 ; GFX9-G-O0-NEXT: buffer_load_dword v22, off, s[0:3], s32 offset:212 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: buffer_load_dword v23, off, s[0:3], s32 offset:216 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: buffer_load_dword v24, off, s[0:3], s32 offset:220 ; 4-byte Folded Reload @@ -1836,15 +1853,11 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-G-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:280 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:284 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:288 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[20:21], -1 -; GFX9-G-O0-NEXT: buffer_load_dword v34, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: s_mov_b64 exec, s[20:21] -; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0) -; GFX9-G-O0-NEXT: v_readlane_b32 s6, v34, 6 -; GFX9-G-O0-NEXT: v_readlane_b32 s7, v34, 7 ; GFX9-G-O0-NEXT: s_mov_b64 s[4:5], 0 +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(18) ; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, v2 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, v3 +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(16) ; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v4 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, v5 ; GFX9-G-O0-NEXT: s_mov_b32 s8, 1 @@ -1894,8 +1907,10 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, s8 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, v0 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v1 +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(10) ; GFX9-G-O0-NEXT: v_mov_b32_e32 v28, v30 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v29, v31 +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(8) ; GFX9-G-O0-NEXT: v_mov_b32_e32 v24, v32 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v25, v33 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, v28 @@ -1915,6 +1930,7 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-G-O0-NEXT: ; kill: def $vgpr0_vgpr1 killed $vgpr0_vgpr1 def $vgpr0_vgpr1_vgpr2_vgpr3 killed $exec ; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v14 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v15 +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0) ; GFX9-G-O0-NEXT: v_sub_co_u32_e64 v13, s[8:9], v13, v4 ; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v12, s[8:9], v12, v9, s[8:9] ; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v10, s[8:9], v10, v7, s[8:9] @@ -2027,6 +2043,9 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-G-O0-NEXT: s_cbranch_execnz .LBB0_6 ; GFX9-G-O0-NEXT: s_branch .LBB0_1 ; GFX9-G-O0-NEXT: .LBB0_7: ; %udiv-preheader +; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[20:21], -1 +; GFX9-G-O0-NEXT: buffer_load_dword v34, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: s_mov_b64 exec, s[20:21] ; GFX9-G-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:292 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:296 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:300 ; 4-byte Folded Reload @@ -2044,15 +2063,12 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-G-O0-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:324 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[20:21], -1 -; GFX9-G-O0-NEXT: buffer_load_dword v34, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: s_mov_b64 exec, s[20:21] ; GFX9-G-O0-NEXT: s_mov_b32 s4, 64 -; GFX9-G-O0-NEXT: s_waitcnt vmcnt(4) +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3) ; GFX9-G-O0-NEXT: v_mov_b32_e32 v22, v17 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v23, v16 ; GFX9-G-O0-NEXT: ; kill: def $vgpr20 killed $vgpr20 def $vgpr20_vgpr21 killed $exec -; GFX9-G-O0-NEXT: s_waitcnt vmcnt(2) +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(1) ; GFX9-G-O0-NEXT: v_mov_b32_e32 v21, v4 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, s4 ; GFX9-G-O0-NEXT: v_sub_u32_e64 v4, v18, v4 @@ -2063,7 +2079,7 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-G-O0-NEXT: v_cmp_lt_u32_e64 s[4:5], v18, v6 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, s6 ; GFX9-G-O0-NEXT: v_cmp_eq_u32_e64 s[6:7], v18, v6 -; GFX9-G-O0-NEXT: s_waitcnt vmcnt(1) +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0) ; GFX9-G-O0-NEXT: v_lshrrev_b64 v[6:7], v18, v[20:21] ; GFX9-G-O0-NEXT: v_lshrrev_b64 v[25:26], v18, v[22:23] ; GFX9-G-O0-NEXT: v_lshlrev_b64 v[23:24], v5, v[20:21] @@ -2112,7 +2128,6 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-G-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:276 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: s_mov_b64 s[4:5], s[8:9] ; GFX9-G-O0-NEXT: s_mov_b64 s[6:7], s[8:9] -; GFX9-G-O0-NEXT: s_waitcnt vmcnt(4) ; GFX9-G-O0-NEXT: v_writelane_b32 v34, s8, 6 ; GFX9-G-O0-NEXT: v_writelane_b32 v34, s9, 7 ; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[20:21], -1 @@ -2144,6 +2159,9 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:224 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: s_branch .LBB0_6 ; GFX9-G-O0-NEXT: .LBB0_8: ; %udiv-bb1 +; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[20:21], -1 +; GFX9-G-O0-NEXT: buffer_load_dword v34, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: s_mov_b64 exec, s[20:21] ; GFX9-G-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload @@ -2152,20 +2170,17 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-G-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[20:21], -1 -; GFX9-G-O0-NEXT: buffer_load_dword v34, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: s_mov_b64 exec, s[20:21] ; GFX9-G-O0-NEXT: s_mov_b64 s[4:5], 0 ; GFX9-G-O0-NEXT: s_mov_b32 s6, 1 ; GFX9-G-O0-NEXT: s_mov_b32 s10, 0 ; GFX9-G-O0-NEXT: s_mov_b32 s9, 0 ; GFX9-G-O0-NEXT: s_mov_b32 s8, 0 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, s6 -; GFX9-G-O0-NEXT: s_waitcnt vmcnt(4) +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3) ; GFX9-G-O0-NEXT: v_add_co_u32_e64 v4, s[6:7], v2, v4 ; GFX9-G-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:324 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, s10 -; GFX9-G-O0-NEXT: s_waitcnt vmcnt(2) +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(1) ; GFX9-G-O0-NEXT: v_addc_co_u32_e64 v5, s[6:7], v5, v7, s[6:7] ; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, s9 ; GFX9-G-O0-NEXT: v_addc_co_u32_e64 v7, s[6:7], v6, v7, s[6:7] @@ -2257,7 +2272,6 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-G-O0-NEXT: s_mov_b64 s[6:7], exec ; GFX9-G-O0-NEXT: s_and_b64 s[4:5], s[6:7], s[4:5] ; GFX9-G-O0-NEXT: s_xor_b64 s[6:7], s[4:5], s[6:7] -; GFX9-G-O0-NEXT: s_waitcnt vmcnt(17) ; GFX9-G-O0-NEXT: v_writelane_b32 v34, s6, 4 ; GFX9-G-O0-NEXT: v_writelane_b32 v34, s7, 5 ; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[20:21], -1 @@ -2761,10 +2775,6 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_branch .LBB1_5 ; GFX9-O0-NEXT: .LBB1_3: ; %Flow2 -; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload ; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1 ; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX9-O0-NEXT: s_mov_b64 exec, s[18:19] @@ -2772,8 +2782,13 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: v_readlane_b32 s4, v30, 2 ; GFX9-O0-NEXT: v_readlane_b32 s5, v30, 3 ; GFX9-O0-NEXT: s_or_b64 exec, exec, s[4:5] +; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload +; GFX9-O0-NEXT: s_waitcnt vmcnt(1) ; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:144 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_nop 0 +; GFX9-O0-NEXT: s_waitcnt vmcnt(1) ; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:148 ; 4-byte Folded Spill ; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:136 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 @@ -2820,6 +2835,13 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_branch .LBB1_3 ; GFX9-O0-NEXT: .LBB1_5: ; %Flow1 +; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1 +; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX9-O0-NEXT: s_mov_b64 exec, s[18:19] +; GFX9-O0-NEXT: s_waitcnt vmcnt(0) +; GFX9-O0-NEXT: v_readlane_b32 s4, v30, 6 +; GFX9-O0-NEXT: v_readlane_b32 s5, v30, 7 +; GFX9-O0-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:96 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:100 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:88 ; 4-byte Folded Reload @@ -2828,15 +2850,9 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:84 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1 -; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_mov_b64 exec, s[18:19] -; GFX9-O0-NEXT: s_waitcnt vmcnt(0) -; GFX9-O0-NEXT: v_readlane_b32 s4, v30, 6 -; GFX9-O0-NEXT: v_readlane_b32 s5, v30, 7 -; GFX9-O0-NEXT: s_or_b64 exec, exec, s[4:5] +; GFX9-O0-NEXT: s_waitcnt vmcnt(1) ; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:160 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_nop 0 +; GFX9-O0-NEXT: s_waitcnt vmcnt(1) ; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:164 ; 4-byte Folded Spill ; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:152 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 @@ -2850,6 +2866,12 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: s_branch .LBB1_4 ; GFX9-O0-NEXT: .LBB1_6: ; %udiv-do-while ; GFX9-O0-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1 +; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX9-O0-NEXT: s_mov_b64 exec, s[18:19] +; GFX9-O0-NEXT: s_waitcnt vmcnt(0) +; GFX9-O0-NEXT: v_readlane_b32 s6, v30, 8 +; GFX9-O0-NEXT: v_readlane_b32 s7, v30, 9 ; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:184 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:188 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:192 ; 4-byte Folded Reload @@ -2874,13 +2896,8 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:252 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:256 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:260 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1 -; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_mov_b64 exec, s[18:19] -; GFX9-O0-NEXT: s_waitcnt vmcnt(0) -; GFX9-O0-NEXT: v_readlane_b32 s6, v30, 8 -; GFX9-O0-NEXT: v_readlane_b32 s7, v30, 9 ; GFX9-O0-NEXT: s_mov_b32 s4, 63 +; GFX9-O0-NEXT: s_waitcnt vmcnt(16) ; GFX9-O0-NEXT: v_lshrrev_b64 v[28:29], s4, v[2:3] ; GFX9-O0-NEXT: v_mov_b32_e32 v5, v29 ; GFX9-O0-NEXT: s_mov_b32 s5, 1 @@ -2907,6 +2924,7 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: v_lshrrev_b64 v[0:1], s4, v[0:1] ; GFX9-O0-NEXT: v_mov_b32_e32 v7, v1 ; GFX9-O0-NEXT: v_mov_b32_e32 v6, v29 +; GFX9-O0-NEXT: s_waitcnt vmcnt(10) ; GFX9-O0-NEXT: v_mov_b32_e32 v10, v27 ; GFX9-O0-NEXT: v_or3_b32 v6, v6, v7, v10 ; GFX9-O0-NEXT: v_mov_b32_e32 v1, v0 @@ -2916,6 +2934,7 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec ; GFX9-O0-NEXT: v_mov_b32_e32 v1, v6 ; GFX9-O0-NEXT: v_mov_b32_e32 v7, v3 +; GFX9-O0-NEXT: s_waitcnt vmcnt(8) ; GFX9-O0-NEXT: v_mov_b32_e32 v6, v25 ; GFX9-O0-NEXT: v_or_b32_e64 v6, v6, v7 ; GFX9-O0-NEXT: v_mov_b32_e32 v3, v2 @@ -2927,10 +2946,12 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: v_mov_b32_e32 v10, v5 ; GFX9-O0-NEXT: v_mov_b32_e32 v4, v22 ; GFX9-O0-NEXT: v_mov_b32_e32 v5, v23 +; GFX9-O0-NEXT: s_waitcnt vmcnt(1) ; GFX9-O0-NEXT: v_mov_b32_e32 v13, v11 ; GFX9-O0-NEXT: v_mov_b32_e32 v11, v14 ; GFX9-O0-NEXT: v_mov_b32_e32 v7, v15 ; GFX9-O0-NEXT: v_sub_co_u32_e32 v13, vcc, v13, v6 +; GFX9-O0-NEXT: s_waitcnt vmcnt(0) ; GFX9-O0-NEXT: v_subb_co_u32_e32 v12, vcc, v12, v10, vcc ; GFX9-O0-NEXT: v_subb_co_u32_e32 v11, vcc, v11, v4, vcc ; GFX9-O0-NEXT: v_subb_co_u32_e32 v7, vcc, v7, v5, vcc @@ -3070,6 +3091,9 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: s_cbranch_execnz .LBB1_6 ; GFX9-O0-NEXT: s_branch .LBB1_1 ; GFX9-O0-NEXT: .LBB1_7: ; %udiv-preheader +; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1 +; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX9-O0-NEXT: s_mov_b64 exec, s[18:19] ; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:264 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:268 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:272 ; 4-byte Folded Reload @@ -3086,12 +3110,9 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1 -; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_mov_b64 exec, s[18:19] -; GFX9-O0-NEXT: s_waitcnt vmcnt(10) +; GFX9-O0-NEXT: s_waitcnt vmcnt(9) ; GFX9-O0-NEXT: v_mov_b32_e32 v4, v10 -; GFX9-O0-NEXT: s_waitcnt vmcnt(1) +; GFX9-O0-NEXT: s_waitcnt vmcnt(0) ; GFX9-O0-NEXT: v_lshrrev_b64 v[6:7], v4, v[20:21] ; GFX9-O0-NEXT: v_mov_b32_e32 v5, v7 ; GFX9-O0-NEXT: s_mov_b32 s6, 64 @@ -3172,7 +3193,6 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: v_mov_b32_e32 v14, s8 ; GFX9-O0-NEXT: v_mov_b32_e32 v13, s7 ; GFX9-O0-NEXT: v_mov_b32_e32 v12, s6 -; GFX9-O0-NEXT: s_waitcnt vmcnt(4) ; GFX9-O0-NEXT: v_writelane_b32 v30, s4, 8 ; GFX9-O0-NEXT: v_writelane_b32 v30, s5, 9 ; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1 @@ -3204,6 +3224,9 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:188 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_branch .LBB1_6 ; GFX9-O0-NEXT: .LBB1_8: ; %udiv-bb1 +; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1 +; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX9-O0-NEXT: s_mov_b64 exec, s[18:19] ; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload @@ -3212,12 +3235,9 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1 -; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_mov_b64 exec, s[18:19] ; GFX9-O0-NEXT: s_mov_b64 s[6:7], 1 ; GFX9-O0-NEXT: s_mov_b32 s5, s6 -; GFX9-O0-NEXT: s_waitcnt vmcnt(2) +; GFX9-O0-NEXT: s_waitcnt vmcnt(1) ; GFX9-O0-NEXT: v_mov_b32_e32 v3, v0 ; GFX9-O0-NEXT: s_mov_b32 s4, s7 ; GFX9-O0-NEXT: s_mov_b64 s[6:7], 0 @@ -3228,7 +3248,7 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: v_mov_b32_e32 v4, s5 ; GFX9-O0-NEXT: v_add_co_u32_e32 v8, vcc, v3, v4 ; GFX9-O0-NEXT: v_mov_b32_e32 v4, s4 -; GFX9-O0-NEXT: s_waitcnt vmcnt(1) +; GFX9-O0-NEXT: s_waitcnt vmcnt(0) ; GFX9-O0-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v4, vcc ; GFX9-O0-NEXT: v_mov_b32_e32 v4, s8 ; GFX9-O0-NEXT: v_addc_co_u32_e32 v0, vcc, v0, v4, vcc @@ -3332,7 +3352,6 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: s_mov_b64 s[6:7], exec ; GFX9-O0-NEXT: s_and_b64 s[4:5], s[6:7], s[4:5] ; GFX9-O0-NEXT: s_xor_b64 s[6:7], s[4:5], s[6:7] -; GFX9-O0-NEXT: s_waitcnt vmcnt(16) ; GFX9-O0-NEXT: v_writelane_b32 v30, s6, 6 ; GFX9-O0-NEXT: v_writelane_b32 v30, s7, 7 ; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1 @@ -3793,10 +3812,6 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: s_branch .LBB1_5 ; GFX9-G-O0-NEXT: .LBB1_3: ; %Flow2 -; GFX9-G-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[18:19], -1 ; GFX9-G-O0-NEXT: buffer_load_dword v34, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: s_mov_b64 exec, s[18:19] @@ -3804,10 +3819,17 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-G-O0-NEXT: v_readlane_b32 s4, v34, 0 ; GFX9-G-O0-NEXT: v_readlane_b32 s5, v34, 1 ; GFX9-G-O0-NEXT: s_or_b64 exec, exec, s[4:5] +; GFX9-G-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3) ; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: s_nop 0 +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3) ; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:136 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3) ; GFX9-G-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:140 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3) ; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:144 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: s_branch .LBB1_9 ; GFX9-G-O0-NEXT: .LBB1_4: ; %udiv-loop-exit @@ -3867,6 +3889,13 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: s_branch .LBB1_3 ; GFX9-G-O0-NEXT: .LBB1_5: ; %Flow1 +; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[18:19], -1 +; GFX9-G-O0-NEXT: buffer_load_dword v34, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: s_mov_b64 exec, s[18:19] +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0) +; GFX9-G-O0-NEXT: v_readlane_b32 s4, v34, 4 +; GFX9-G-O0-NEXT: v_readlane_b32 s5, v34, 5 +; GFX9-G-O0-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX9-G-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:84 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:88 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:92 ; 4-byte Folded Reload @@ -3875,17 +3904,13 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-G-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[18:19], -1 -; GFX9-G-O0-NEXT: buffer_load_dword v34, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: s_mov_b64 exec, s[18:19] -; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0) -; GFX9-G-O0-NEXT: v_readlane_b32 s4, v34, 4 -; GFX9-G-O0-NEXT: v_readlane_b32 s5, v34, 5 -; GFX9-G-O0-NEXT: s_or_b64 exec, exec, s[4:5] +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3) ; GFX9-G-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:148 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: s_nop 0 +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3) ; GFX9-G-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:152 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3) ; GFX9-G-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:156 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3) ; GFX9-G-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:160 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:164 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: s_nop 0 @@ -3895,6 +3920,12 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-G-O0-NEXT: s_branch .LBB1_4 ; GFX9-G-O0-NEXT: .LBB1_6: ; %udiv-do-while ; GFX9-G-O0-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[18:19], -1 +; GFX9-G-O0-NEXT: buffer_load_dword v34, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: s_mov_b64 exec, s[18:19] +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0) +; GFX9-G-O0-NEXT: v_readlane_b32 s6, v34, 6 +; GFX9-G-O0-NEXT: v_readlane_b32 s7, v34, 7 ; GFX9-G-O0-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:180 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:184 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:188 ; 4-byte Folded Reload @@ -3919,15 +3950,11 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-G-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:248 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:252 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:256 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[18:19], -1 -; GFX9-G-O0-NEXT: buffer_load_dword v34, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: s_mov_b64 exec, s[18:19] -; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0) -; GFX9-G-O0-NEXT: v_readlane_b32 s6, v34, 6 -; GFX9-G-O0-NEXT: v_readlane_b32 s7, v34, 7 ; GFX9-G-O0-NEXT: s_mov_b64 s[4:5], 0 +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(18) ; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, v2 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, v3 +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(16) ; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v4 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, v5 ; GFX9-G-O0-NEXT: s_mov_b32 s8, 1 @@ -3977,8 +4004,10 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, s8 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v12, v0 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v1 +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(10) ; GFX9-G-O0-NEXT: v_mov_b32_e32 v28, v30 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v29, v31 +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(8) ; GFX9-G-O0-NEXT: v_mov_b32_e32 v20, v32 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v21, v33 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, v28 @@ -3998,6 +4027,7 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-G-O0-NEXT: ; kill: def $vgpr0_vgpr1 killed $vgpr0_vgpr1 def $vgpr0_vgpr1_vgpr2_vgpr3 killed $exec ; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v12 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v13 +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0) ; GFX9-G-O0-NEXT: v_sub_co_u32_e64 v11, s[8:9], v11, v4 ; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v10, s[8:9], v10, v9, s[8:9] ; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v8, s[8:9], v8, v7, s[8:9] @@ -4118,6 +4148,9 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-G-O0-NEXT: s_cbranch_execnz .LBB1_6 ; GFX9-G-O0-NEXT: s_branch .LBB1_1 ; GFX9-G-O0-NEXT: .LBB1_7: ; %udiv-preheader +; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[18:19], -1 +; GFX9-G-O0-NEXT: buffer_load_dword v34, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: s_mov_b64 exec, s[18:19] ; GFX9-G-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:260 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:264 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:268 ; 4-byte Folded Reload @@ -4135,14 +4168,11 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-G-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[18:19], -1 -; GFX9-G-O0-NEXT: buffer_load_dword v34, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: s_mov_b64 exec, s[18:19] ; GFX9-G-O0-NEXT: s_mov_b32 s4, 64 -; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3) +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(2) ; GFX9-G-O0-NEXT: v_mov_b32_e32 v15, v5 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, v4 -; GFX9-G-O0-NEXT: s_waitcnt vmcnt(1) +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0) ; GFX9-G-O0-NEXT: v_mov_b32_e32 v21, v7 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v20, v6 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, s4 @@ -4208,7 +4238,6 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-G-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:244 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: s_mov_b64 s[4:5], s[8:9] ; GFX9-G-O0-NEXT: s_mov_b64 s[6:7], s[8:9] -; GFX9-G-O0-NEXT: s_waitcnt vmcnt(4) ; GFX9-G-O0-NEXT: v_writelane_b32 v34, s8, 6 ; GFX9-G-O0-NEXT: v_writelane_b32 v34, s9, 7 ; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[18:19], -1 @@ -4240,6 +4269,9 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:192 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: s_branch .LBB1_6 ; GFX9-G-O0-NEXT: .LBB1_8: ; %udiv-bb1 +; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[18:19], -1 +; GFX9-G-O0-NEXT: buffer_load_dword v34, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: s_mov_b64 exec, s[18:19] ; GFX9-G-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload @@ -4248,20 +4280,17 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-G-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[18:19], -1 -; GFX9-G-O0-NEXT: buffer_load_dword v34, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: s_mov_b64 exec, s[18:19] ; GFX9-G-O0-NEXT: s_mov_b64 s[4:5], 0 ; GFX9-G-O0-NEXT: s_mov_b32 s6, 1 ; GFX9-G-O0-NEXT: s_mov_b32 s10, 0 ; GFX9-G-O0-NEXT: s_mov_b32 s9, 0 ; GFX9-G-O0-NEXT: s_mov_b32 s8, 0 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, s6 -; GFX9-G-O0-NEXT: s_waitcnt vmcnt(4) +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3) ; GFX9-G-O0-NEXT: v_add_co_u32_e64 v4, s[6:7], v1, v4 ; GFX9-G-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:292 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, s10 -; GFX9-G-O0-NEXT: s_waitcnt vmcnt(2) +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(1) ; GFX9-G-O0-NEXT: v_addc_co_u32_e64 v5, s[6:7], v3, v5, s[6:7] ; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, s9 ; GFX9-G-O0-NEXT: v_addc_co_u32_e64 v7, s[6:7], v2, v3, s[6:7] @@ -4353,7 +4382,6 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-G-O0-NEXT: s_mov_b64 s[6:7], exec ; GFX9-G-O0-NEXT: s_and_b64 s[4:5], s[6:7], s[4:5] ; GFX9-G-O0-NEXT: s_xor_b64 s[6:7], s[4:5], s[6:7] -; GFX9-G-O0-NEXT: s_waitcnt vmcnt(17) ; GFX9-G-O0-NEXT: v_writelane_b32 v34, s6, 4 ; GFX9-G-O0-NEXT: v_writelane_b32 v34, s7, 5 ; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[18:19], -1 diff --git a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-s-add-i32.mir b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-s-add-i32.mir index 585bfb4c58eae2b..001a72e36097683 100644 --- a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-s-add-i32.mir +++ b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-s-add-i32.mir @@ -258,31 +258,31 @@ body: | ; MUBUFW64-LABEL: name: s_add_i32__sgpr__fi_offset0 ; MUBUFW64: liveins: $sgpr8 ; MUBUFW64-NEXT: {{ $}} - ; MUBUFW64-NEXT: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc - ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr4, $sgpr8, implicit-def dead $scc + ; MUBUFW64-NEXT: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc + ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr7, $sgpr8, implicit-def dead $scc ; MUBUFW64-NEXT: renamable $sgpr7 = COPY killed renamable $sgpr7 ; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7 ; ; MUBUFW32-LABEL: name: s_add_i32__sgpr__fi_offset0 ; MUBUFW32: liveins: $sgpr8 ; MUBUFW32-NEXT: {{ $}} - ; MUBUFW32-NEXT: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc - ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr4, $sgpr8, implicit-def dead $scc + ; MUBUFW32-NEXT: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc + ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr7, $sgpr8, implicit-def dead $scc ; MUBUFW32-NEXT: renamable $sgpr7 = COPY killed renamable $sgpr7 ; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7 ; ; FLATSCRW64-LABEL: name: s_add_i32__sgpr__fi_offset0 ; FLATSCRW64: liveins: $sgpr8 ; FLATSCRW64-NEXT: {{ $}} - ; FLATSCRW64-NEXT: renamable $sgpr4 = S_ADD_I32 killed $sgpr32, $sgpr8, implicit-def dead $scc - ; FLATSCRW64-NEXT: renamable $sgpr7 = COPY killed renamable $sgpr4 + ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr32, $sgpr8, implicit-def dead $scc + ; FLATSCRW64-NEXT: renamable $sgpr7 = COPY killed renamable $sgpr7 ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7 ; ; FLATSCRW32-LABEL: name: s_add_i32__sgpr__fi_offset0 ; FLATSCRW32: liveins: $sgpr8 ; FLATSCRW32-NEXT: {{ $}} - ; FLATSCRW32-NEXT: renamable $sgpr4 = S_ADD_I32 killed $sgpr32, $sgpr8, implicit-def dead $scc - ; FLATSCRW32-NEXT: renamable $sgpr7 = COPY killed renamable $sgpr4 + ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr32, $sgpr8, implicit-def dead $scc + ; FLATSCRW32-NEXT: renamable $sgpr7 = COPY killed renamable $sgpr7 ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7 renamable $sgpr7 = S_ADD_I32 $sgpr8, %stack.0, implicit-def dead $scc SI_RETURN implicit $sgpr7 @@ -304,31 +304,31 @@ body: | ; MUBUFW64-LABEL: name: s_add_i32__fi_offset0__sgpr ; MUBUFW64: liveins: $sgpr8 ; MUBUFW64-NEXT: {{ $}} - ; MUBUFW64-NEXT: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc - ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr4, $sgpr8, implicit-def dead $scc + ; MUBUFW64-NEXT: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc + ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr7, $sgpr8, implicit-def dead $scc ; MUBUFW64-NEXT: renamable $sgpr7 = COPY killed renamable $sgpr7 ; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7 ; ; MUBUFW32-LABEL: name: s_add_i32__fi_offset0__sgpr ; MUBUFW32: liveins: $sgpr8 ; MUBUFW32-NEXT: {{ $}} - ; MUBUFW32-NEXT: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc - ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr4, $sgpr8, implicit-def dead $scc + ; MUBUFW32-NEXT: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc + ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr7, $sgpr8, implicit-def dead $scc ; MUBUFW32-NEXT: renamable $sgpr7 = COPY killed renamable $sgpr7 ; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7 ; ; FLATSCRW64-LABEL: name: s_add_i32__fi_offset0__sgpr ; FLATSCRW64: liveins: $sgpr8 ; FLATSCRW64-NEXT: {{ $}} - ; FLATSCRW64-NEXT: renamable $sgpr4 = S_ADD_I32 killed $sgpr32, $sgpr8, implicit-def dead $scc - ; FLATSCRW64-NEXT: renamable $sgpr7 = COPY killed renamable $sgpr4 + ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr32, $sgpr8, implicit-def dead $scc + ; FLATSCRW64-NEXT: renamable $sgpr7 = COPY killed renamable $sgpr7 ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7 ; ; FLATSCRW32-LABEL: name: s_add_i32__fi_offset0__sgpr ; FLATSCRW32: liveins: $sgpr8 ; FLATSCRW32-NEXT: {{ $}} - ; FLATSCRW32-NEXT: renamable $sgpr4 = S_ADD_I32 killed $sgpr32, $sgpr8, implicit-def dead $scc - ; FLATSCRW32-NEXT: renamable $sgpr7 = COPY killed renamable $sgpr4 + ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr32, $sgpr8, implicit-def dead $scc + ; FLATSCRW32-NEXT: renamable $sgpr7 = COPY killed renamable $sgpr7 ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7 renamable $sgpr7 = S_ADD_I32 %stack.0, $sgpr8, implicit-def dead $scc SI_RETURN implicit $sgpr7 @@ -351,31 +351,31 @@ body: | ; MUBUFW64-LABEL: name: s_add_i32__sgpr__fi_literal_offset ; MUBUFW64: liveins: $sgpr8 ; MUBUFW64-NEXT: {{ $}} - ; MUBUFW64-NEXT: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc - ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr4, $sgpr8, implicit-def dead $scc + ; MUBUFW64-NEXT: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc + ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr7, $sgpr8, implicit-def dead $scc ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 killed renamable $sgpr7, 80, implicit-def dead $scc ; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7 ; ; MUBUFW32-LABEL: name: s_add_i32__sgpr__fi_literal_offset ; MUBUFW32: liveins: $sgpr8 ; MUBUFW32-NEXT: {{ $}} - ; MUBUFW32-NEXT: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc - ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr4, $sgpr8, implicit-def dead $scc + ; MUBUFW32-NEXT: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc + ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr7, $sgpr8, implicit-def dead $scc ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 killed renamable $sgpr7, 80, implicit-def dead $scc ; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7 ; ; FLATSCRW64-LABEL: name: s_add_i32__sgpr__fi_literal_offset ; FLATSCRW64: liveins: $sgpr8 ; FLATSCRW64-NEXT: {{ $}} - ; FLATSCRW64-NEXT: renamable $sgpr4 = S_ADD_I32 killed $sgpr32, $sgpr8, implicit-def dead $scc - ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 killed renamable $sgpr4, 80, implicit-def dead $scc + ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr32, $sgpr8, implicit-def dead $scc + ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 killed renamable $sgpr7, 80, implicit-def dead $scc ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7 ; ; FLATSCRW32-LABEL: name: s_add_i32__sgpr__fi_literal_offset ; FLATSCRW32: liveins: $sgpr8 ; FLATSCRW32-NEXT: {{ $}} - ; FLATSCRW32-NEXT: renamable $sgpr4 = S_ADD_I32 killed $sgpr32, $sgpr8, implicit-def dead $scc - ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 killed renamable $sgpr4, 80, implicit-def dead $scc + ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr32, $sgpr8, implicit-def dead $scc + ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 killed renamable $sgpr7, 80, implicit-def dead $scc ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7 renamable $sgpr7 = S_ADD_I32 $sgpr8, %stack.1, implicit-def dead $scc SI_RETURN implicit $sgpr7 @@ -398,31 +398,31 @@ body: | ; MUBUFW64-LABEL: name: s_add_i32__fi_literal_offset__sgpr ; MUBUFW64: liveins: $sgpr8 ; MUBUFW64-NEXT: {{ $}} - ; MUBUFW64-NEXT: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc - ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr4, $sgpr8, implicit-def $scc + ; MUBUFW64-NEXT: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc + ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr7, $sgpr8, implicit-def $scc ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 80, killed renamable $sgpr7, implicit-def $scc ; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc ; ; MUBUFW32-LABEL: name: s_add_i32__fi_literal_offset__sgpr ; MUBUFW32: liveins: $sgpr8 ; MUBUFW32-NEXT: {{ $}} - ; MUBUFW32-NEXT: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc - ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr4, $sgpr8, implicit-def $scc + ; MUBUFW32-NEXT: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc + ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr7, $sgpr8, implicit-def $scc ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 80, killed renamable $sgpr7, implicit-def $scc ; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc ; ; FLATSCRW64-LABEL: name: s_add_i32__fi_literal_offset__sgpr ; FLATSCRW64: liveins: $sgpr8 ; FLATSCRW64-NEXT: {{ $}} - ; FLATSCRW64-NEXT: renamable $sgpr4 = S_ADD_I32 killed $sgpr32, $sgpr8, implicit-def $scc - ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 80, killed renamable $sgpr4, implicit-def $scc + ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr32, $sgpr8, implicit-def $scc + ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 80, killed renamable $sgpr7, implicit-def $scc ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc ; ; FLATSCRW32-LABEL: name: s_add_i32__fi_literal_offset__sgpr ; FLATSCRW32: liveins: $sgpr8 ; FLATSCRW32-NEXT: {{ $}} - ; FLATSCRW32-NEXT: renamable $sgpr4 = S_ADD_I32 killed $sgpr32, $sgpr8, implicit-def $scc - ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 80, killed renamable $sgpr4, implicit-def $scc + ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr32, $sgpr8, implicit-def $scc + ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 80, killed renamable $sgpr7, implicit-def $scc ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc renamable $sgpr7 = S_ADD_I32 %stack.1, $sgpr8, implicit-def $scc SI_RETURN implicit $sgpr7, implicit $scc @@ -702,31 +702,31 @@ body: | ; MUBUFW64-LABEL: name: s_add_i32__sgpr__fi_offset0__live_scc ; MUBUFW64: liveins: $sgpr8 ; MUBUFW64-NEXT: {{ $}} - ; MUBUFW64-NEXT: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc - ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr4, $sgpr8, implicit-def $scc + ; MUBUFW64-NEXT: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc + ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr7, $sgpr8, implicit-def $scc ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 killed renamable $sgpr7, 0, implicit-def $scc ; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc ; ; MUBUFW32-LABEL: name: s_add_i32__sgpr__fi_offset0__live_scc ; MUBUFW32: liveins: $sgpr8 ; MUBUFW32-NEXT: {{ $}} - ; MUBUFW32-NEXT: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc - ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr4, $sgpr8, implicit-def $scc + ; MUBUFW32-NEXT: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc + ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr7, $sgpr8, implicit-def $scc ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 killed renamable $sgpr7, 0, implicit-def $scc ; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc ; ; FLATSCRW64-LABEL: name: s_add_i32__sgpr__fi_offset0__live_scc ; FLATSCRW64: liveins: $sgpr8 ; FLATSCRW64-NEXT: {{ $}} - ; FLATSCRW64-NEXT: renamable $sgpr4 = S_ADD_I32 killed $sgpr32, $sgpr8, implicit-def $scc - ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 killed renamable $sgpr4, 0, implicit-def $scc + ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr32, $sgpr8, implicit-def $scc + ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 killed renamable $sgpr7, 0, implicit-def $scc ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc ; ; FLATSCRW32-LABEL: name: s_add_i32__sgpr__fi_offset0__live_scc ; FLATSCRW32: liveins: $sgpr8 ; FLATSCRW32-NEXT: {{ $}} - ; FLATSCRW32-NEXT: renamable $sgpr4 = S_ADD_I32 killed $sgpr32, $sgpr8, implicit-def $scc - ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 killed renamable $sgpr4, 0, implicit-def $scc + ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr32, $sgpr8, implicit-def $scc + ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 killed renamable $sgpr7, 0, implicit-def $scc ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc renamable $sgpr7 = S_ADD_I32 $sgpr8, %stack.0, implicit-def $scc SI_RETURN implicit $sgpr7, implicit $scc @@ -795,31 +795,31 @@ body: | ; MUBUFW64-LABEL: name: s_add_i32__sgpr__fi_literal_offset__live_scc ; MUBUFW64: liveins: $sgpr8 ; MUBUFW64-NEXT: {{ $}} - ; MUBUFW64-NEXT: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc - ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr4, $sgpr8, implicit-def $scc + ; MUBUFW64-NEXT: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc + ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr7, $sgpr8, implicit-def $scc ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 killed renamable $sgpr7, 96, implicit-def $scc ; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc ; ; MUBUFW32-LABEL: name: s_add_i32__sgpr__fi_literal_offset__live_scc ; MUBUFW32: liveins: $sgpr8 ; MUBUFW32-NEXT: {{ $}} - ; MUBUFW32-NEXT: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc - ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr4, $sgpr8, implicit-def $scc + ; MUBUFW32-NEXT: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc + ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr7, $sgpr8, implicit-def $scc ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 killed renamable $sgpr7, 96, implicit-def $scc ; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc ; ; FLATSCRW64-LABEL: name: s_add_i32__sgpr__fi_literal_offset__live_scc ; FLATSCRW64: liveins: $sgpr8 ; FLATSCRW64-NEXT: {{ $}} - ; FLATSCRW64-NEXT: renamable $sgpr4 = S_ADD_I32 killed $sgpr32, $sgpr8, implicit-def $scc - ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 killed renamable $sgpr4, 96, implicit-def $scc + ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr32, $sgpr8, implicit-def $scc + ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 killed renamable $sgpr7, 96, implicit-def $scc ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc ; ; FLATSCRW32-LABEL: name: s_add_i32__sgpr__fi_literal_offset__live_scc ; FLATSCRW32: liveins: $sgpr8 ; FLATSCRW32-NEXT: {{ $}} - ; FLATSCRW32-NEXT: renamable $sgpr4 = S_ADD_I32 killed $sgpr32, $sgpr8, implicit-def $scc - ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 killed renamable $sgpr4, 96, implicit-def $scc + ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr32, $sgpr8, implicit-def $scc + ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 killed renamable $sgpr7, 96, implicit-def $scc ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc renamable $sgpr7 = S_ADD_I32 $sgpr8, %stack.1, implicit-def $scc SI_RETURN implicit $sgpr7, implicit $scc @@ -1104,31 +1104,31 @@ body: | ; MUBUFW64-LABEL: name: s_add_i32__different_sgpr__fi_offset0 ; MUBUFW64: liveins: $sgpr8 ; MUBUFW64-NEXT: {{ $}} - ; MUBUFW64-NEXT: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc - ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr4, $sgpr8, implicit-def dead $scc + ; MUBUFW64-NEXT: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc + ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr7, $sgpr8, implicit-def dead $scc ; MUBUFW64-NEXT: renamable $sgpr7 = COPY killed renamable $sgpr7 ; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7 ; ; MUBUFW32-LABEL: name: s_add_i32__different_sgpr__fi_offset0 ; MUBUFW32: liveins: $sgpr8 ; MUBUFW32-NEXT: {{ $}} - ; MUBUFW32-NEXT: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc - ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr4, $sgpr8, implicit-def dead $scc + ; MUBUFW32-NEXT: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc + ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr7, $sgpr8, implicit-def dead $scc ; MUBUFW32-NEXT: renamable $sgpr7 = COPY killed renamable $sgpr7 ; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7 ; ; FLATSCRW64-LABEL: name: s_add_i32__different_sgpr__fi_offset0 ; FLATSCRW64: liveins: $sgpr8 ; FLATSCRW64-NEXT: {{ $}} - ; FLATSCRW64-NEXT: renamable $sgpr4 = S_ADD_I32 killed $sgpr32, $sgpr8, implicit-def dead $scc - ; FLATSCRW64-NEXT: renamable $sgpr7 = COPY killed renamable $sgpr4 + ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr32, $sgpr8, implicit-def dead $scc + ; FLATSCRW64-NEXT: renamable $sgpr7 = COPY killed renamable $sgpr7 ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7 ; ; FLATSCRW32-LABEL: name: s_add_i32__different_sgpr__fi_offset0 ; FLATSCRW32: liveins: $sgpr8 ; FLATSCRW32-NEXT: {{ $}} - ; FLATSCRW32-NEXT: renamable $sgpr4 = S_ADD_I32 killed $sgpr32, $sgpr8, implicit-def dead $scc - ; FLATSCRW32-NEXT: renamable $sgpr7 = COPY killed renamable $sgpr4 + ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr32, $sgpr8, implicit-def dead $scc + ; FLATSCRW32-NEXT: renamable $sgpr7 = COPY killed renamable $sgpr7 ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7 renamable $sgpr7 = S_ADD_I32 $sgpr8, %stack.0, implicit-def dead $scc SI_RETURN implicit $sgpr7 @@ -1150,31 +1150,31 @@ body: | ; MUBUFW64-LABEL: name: s_add_i32__different_sgpr__fi_offset0_live_after ; MUBUFW64: liveins: $sgpr8 ; MUBUFW64-NEXT: {{ $}} - ; MUBUFW64-NEXT: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc - ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr4, $sgpr8, implicit-def dead $scc + ; MUBUFW64-NEXT: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc + ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr7, $sgpr8, implicit-def dead $scc ; MUBUFW64-NEXT: renamable $sgpr7 = COPY killed renamable $sgpr7 ; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7, implicit $sgpr8 ; ; MUBUFW32-LABEL: name: s_add_i32__different_sgpr__fi_offset0_live_after ; MUBUFW32: liveins: $sgpr8 ; MUBUFW32-NEXT: {{ $}} - ; MUBUFW32-NEXT: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc - ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr4, $sgpr8, implicit-def dead $scc + ; MUBUFW32-NEXT: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc + ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr7, $sgpr8, implicit-def dead $scc ; MUBUFW32-NEXT: renamable $sgpr7 = COPY killed renamable $sgpr7 ; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7, implicit $sgpr8 ; ; FLATSCRW64-LABEL: name: s_add_i32__different_sgpr__fi_offset0_live_after ; FLATSCRW64: liveins: $sgpr8 ; FLATSCRW64-NEXT: {{ $}} - ; FLATSCRW64-NEXT: renamable $sgpr4 = S_ADD_I32 killed $sgpr32, $sgpr8, implicit-def dead $scc - ; FLATSCRW64-NEXT: renamable $sgpr7 = COPY killed renamable $sgpr4 + ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr32, $sgpr8, implicit-def dead $scc + ; FLATSCRW64-NEXT: renamable $sgpr7 = COPY killed renamable $sgpr7 ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7, implicit $sgpr8 ; ; FLATSCRW32-LABEL: name: s_add_i32__different_sgpr__fi_offset0_live_after ; FLATSCRW32: liveins: $sgpr8 ; FLATSCRW32-NEXT: {{ $}} - ; FLATSCRW32-NEXT: renamable $sgpr4 = S_ADD_I32 killed $sgpr32, $sgpr8, implicit-def dead $scc - ; FLATSCRW32-NEXT: renamable $sgpr7 = COPY killed renamable $sgpr4 + ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr32, $sgpr8, implicit-def dead $scc + ; FLATSCRW32-NEXT: renamable $sgpr7 = COPY killed renamable $sgpr7 ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7, implicit $sgpr8 renamable $sgpr7 = S_ADD_I32 $sgpr8, %stack.0, implicit-def dead $scc SI_RETURN implicit $sgpr7, implicit $sgpr8 @@ -1462,3 +1462,50 @@ body: | SI_RETURN implicit $sgpr8 ... + +# Must use the result register as scratch register. +--- +name: s_add_i32_use_dst_reg_as_temp_regression +tracksRegLiveness: true +stack: + - { id: 0, size: 1, alignment: 4, local-offset: 0 } +machineFunctionInfo: + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + frameOffsetReg: '$sgpr33' + stackPtrOffsetReg: '$sgpr32' +body: | + bb.0: + liveins: $vcc_lo, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29 + ; MUBUFW64-LABEL: name: s_add_i32_use_dst_reg_as_temp_regression + ; MUBUFW64: liveins: $vcc_lo, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29 + ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: renamable $vcc_hi = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc + ; MUBUFW64-NEXT: renamable $vcc_hi = S_ADD_I32 killed $vcc_hi, renamable $vcc_lo, implicit-def dead $scc + ; MUBUFW64-NEXT: renamable $vcc_hi = COPY killed renamable $vcc_hi + ; MUBUFW64-NEXT: SI_RETURN implicit $vcc_lo, implicit $vcc_hi, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29 + ; + ; MUBUFW32-LABEL: name: s_add_i32_use_dst_reg_as_temp_regression + ; MUBUFW32: liveins: $vcc_lo, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29 + ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: renamable $vcc_hi = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc + ; MUBUFW32-NEXT: renamable $vcc_hi = S_ADD_I32 killed $vcc_hi, renamable $vcc_lo, implicit-def dead $scc + ; MUBUFW32-NEXT: renamable $vcc_hi = COPY killed renamable $vcc_hi + ; MUBUFW32-NEXT: SI_RETURN implicit $vcc_lo, implicit $vcc_hi, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29 + ; + ; FLATSCRW64-LABEL: name: s_add_i32_use_dst_reg_as_temp_regression + ; FLATSCRW64: liveins: $vcc_lo, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29 + ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: renamable $vcc_hi = S_ADD_I32 killed $sgpr32, renamable $vcc_lo, implicit-def dead $scc + ; FLATSCRW64-NEXT: renamable $vcc_hi = COPY killed renamable $vcc_hi + ; FLATSCRW64-NEXT: SI_RETURN implicit $vcc_lo, implicit $vcc_hi, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29 + ; + ; FLATSCRW32-LABEL: name: s_add_i32_use_dst_reg_as_temp_regression + ; FLATSCRW32: liveins: $vcc_lo, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29 + ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: renamable $vcc_hi = S_ADD_I32 killed $sgpr32, renamable $vcc_lo, implicit-def dead $scc + ; FLATSCRW32-NEXT: renamable $vcc_hi = COPY killed renamable $vcc_hi + ; FLATSCRW32-NEXT: SI_RETURN implicit $vcc_lo, implicit $vcc_hi, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29 + renamable $vcc_hi = S_ADD_I32 renamable $vcc_lo, %stack.0, implicit-def dead $scc + SI_RETURN implicit $vcc_lo, implicit $vcc_hi, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29 + +... diff --git a/llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll b/llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll index f1f4abe580c0025..639b2ff25dcb86a 100644 --- a/llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll +++ b/llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll @@ -1600,8 +1600,14 @@ define amdgpu_kernel void @extract_neg_offset_vgpr(ptr addrspace(1) %out) { ; NOOPT-NEXT: ; implicit-def: $vgpr0 ; NOOPT-NEXT: ; implicit-def: $sgpr0_sgpr1 ; NOOPT-NEXT: .LBB5_1: ; =>This Inner Loop Header: Depth=1 +; NOOPT-NEXT: s_or_saveexec_b64 s[16:17], -1 +; NOOPT-NEXT: s_waitcnt expcnt(0) +; NOOPT-NEXT: buffer_load_dword v31, off, s[20:23], 0 ; 4-byte Folded Reload +; NOOPT-NEXT: s_mov_b64 exec, s[16:17] +; NOOPT-NEXT: s_waitcnt vmcnt(0) +; NOOPT-NEXT: v_readlane_b32 s0, v31, 6 +; NOOPT-NEXT: v_readlane_b32 s1, v31, 7 ; NOOPT-NEXT: buffer_load_dword v17, off, s[20:23], 0 offset:72 ; 4-byte Folded Reload -; NOOPT-NEXT: s_waitcnt expcnt(1) ; NOOPT-NEXT: buffer_load_dword v0, off, s[20:23], 0 offset:4 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v1, off, s[20:23], 0 offset:8 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v2, off, s[20:23], 0 offset:12 ; 4-byte Folded Reload @@ -1612,26 +1618,14 @@ define amdgpu_kernel void @extract_neg_offset_vgpr(ptr addrspace(1) %out) { ; NOOPT-NEXT: buffer_load_dword v7, off, s[20:23], 0 offset:32 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v8, off, s[20:23], 0 offset:36 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v9, off, s[20:23], 0 offset:40 ; 4-byte Folded Reload -; NOOPT-NEXT: s_waitcnt expcnt(6) ; NOOPT-NEXT: buffer_load_dword v10, off, s[20:23], 0 offset:44 ; 4-byte Folded Reload -; NOOPT-NEXT: s_waitcnt expcnt(5) ; NOOPT-NEXT: buffer_load_dword v11, off, s[20:23], 0 offset:48 ; 4-byte Folded Reload -; NOOPT-NEXT: s_waitcnt expcnt(4) ; NOOPT-NEXT: buffer_load_dword v12, off, s[20:23], 0 offset:52 ; 4-byte Folded Reload -; NOOPT-NEXT: s_waitcnt expcnt(3) ; NOOPT-NEXT: buffer_load_dword v13, off, s[20:23], 0 offset:56 ; 4-byte Folded Reload -; NOOPT-NEXT: s_waitcnt expcnt(2) ; NOOPT-NEXT: buffer_load_dword v14, off, s[20:23], 0 offset:60 ; 4-byte Folded Reload -; NOOPT-NEXT: s_waitcnt expcnt(1) ; NOOPT-NEXT: buffer_load_dword v15, off, s[20:23], 0 offset:64 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v16, off, s[20:23], 0 offset:68 ; 4-byte Folded Reload -; NOOPT-NEXT: s_or_saveexec_b64 s[16:17], -1 -; NOOPT-NEXT: s_waitcnt expcnt(0) -; NOOPT-NEXT: buffer_load_dword v31, off, s[20:23], 0 ; 4-byte Folded Reload -; NOOPT-NEXT: s_mov_b64 exec, s[16:17] ; NOOPT-NEXT: s_waitcnt vmcnt(0) -; NOOPT-NEXT: v_readlane_b32 s0, v31, 6 -; NOOPT-NEXT: v_readlane_b32 s1, v31, 7 ; NOOPT-NEXT: v_readfirstlane_b32 s2, v16 ; NOOPT-NEXT: v_cmp_eq_u32_e64 s[0:1], s2, v16 ; NOOPT-NEXT: s_and_saveexec_b64 s[0:1], s[0:1] @@ -1657,7 +1651,6 @@ define amdgpu_kernel void @extract_neg_offset_vgpr(ptr addrspace(1) %out) { ; NOOPT-NEXT: v_readlane_b32 s1, v31, 5 ; NOOPT-NEXT: s_mov_b64 exec, s[0:1] ; NOOPT-NEXT: ; %bb.3: -; NOOPT-NEXT: buffer_load_dword v0, off, s[20:23], 0 offset:76 ; 4-byte Folded Reload ; NOOPT-NEXT: s_or_saveexec_b64 s[16:17], -1 ; NOOPT-NEXT: buffer_load_dword v31, off, s[20:23], 0 ; 4-byte Folded Reload ; NOOPT-NEXT: s_mov_b64 exec, s[16:17] @@ -1666,6 +1659,8 @@ define amdgpu_kernel void @extract_neg_offset_vgpr(ptr addrspace(1) %out) { ; NOOPT-NEXT: v_readlane_b32 s1, v31, 1 ; NOOPT-NEXT: v_readlane_b32 s2, v31, 2 ; NOOPT-NEXT: v_readlane_b32 s3, v31, 3 +; NOOPT-NEXT: buffer_load_dword v0, off, s[20:23], 0 offset:76 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt vmcnt(0) ; NOOPT-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; NOOPT-NEXT: s_endpgm ; @@ -4128,6 +4123,13 @@ define amdgpu_kernel void @insert_neg_offset_vgpr(ptr addrspace(1) %in, ptr addr ; NOOPT-NEXT: buffer_store_dword v15, off, s[20:23], 0 offset:64 ; 4-byte Folded Spill ; NOOPT-NEXT: ; implicit-def: $sgpr0_sgpr1 ; NOOPT-NEXT: .LBB14_1: ; =>This Inner Loop Header: Depth=1 +; NOOPT-NEXT: s_or_saveexec_b64 s[16:17], -1 +; NOOPT-NEXT: s_waitcnt expcnt(0) +; NOOPT-NEXT: buffer_load_dword v31, off, s[20:23], 0 ; 4-byte Folded Reload +; NOOPT-NEXT: s_mov_b64 exec, s[16:17] +; NOOPT-NEXT: s_waitcnt vmcnt(0) +; NOOPT-NEXT: v_readlane_b32 s0, v31, 6 +; NOOPT-NEXT: v_readlane_b32 s1, v31, 7 ; NOOPT-NEXT: buffer_load_dword v0, off, s[20:23], 0 offset:4 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v1, off, s[20:23], 0 offset:8 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v2, off, s[20:23], 0 offset:12 ; 4-byte Folded Reload @@ -4153,12 +4155,7 @@ define amdgpu_kernel void @insert_neg_offset_vgpr(ptr addrspace(1) %in, ptr addr ; NOOPT-NEXT: buffer_load_dword v15, off, s[20:23], 0 offset:64 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v16, off, s[20:23], 0 offset:68 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v17, off, s[20:23], 0 offset:136 ; 4-byte Folded Reload -; NOOPT-NEXT: s_or_saveexec_b64 s[16:17], -1 -; NOOPT-NEXT: buffer_load_dword v31, off, s[20:23], 0 ; 4-byte Folded Reload -; NOOPT-NEXT: s_mov_b64 exec, s[16:17] ; NOOPT-NEXT: s_waitcnt vmcnt(0) -; NOOPT-NEXT: v_readlane_b32 s0, v31, 6 -; NOOPT-NEXT: v_readlane_b32 s1, v31, 7 ; NOOPT-NEXT: v_readfirstlane_b32 s2, v17 ; NOOPT-NEXT: v_cmp_eq_u32_e64 s[0:1], s2, v17 ; NOOPT-NEXT: s_and_saveexec_b64 s[0:1], s[0:1] @@ -4214,6 +4211,14 @@ define amdgpu_kernel void @insert_neg_offset_vgpr(ptr addrspace(1) %in, ptr addr ; NOOPT-NEXT: v_readlane_b32 s1, v31, 5 ; NOOPT-NEXT: s_mov_b64 exec, s[0:1] ; NOOPT-NEXT: ; %bb.3: +; NOOPT-NEXT: s_or_saveexec_b64 s[16:17], -1 +; NOOPT-NEXT: buffer_load_dword v31, off, s[20:23], 0 ; 4-byte Folded Reload +; NOOPT-NEXT: s_mov_b64 exec, s[16:17] +; NOOPT-NEXT: s_waitcnt vmcnt(0) +; NOOPT-NEXT: v_readlane_b32 s0, v31, 0 +; NOOPT-NEXT: v_readlane_b32 s1, v31, 1 +; NOOPT-NEXT: v_readlane_b32 s2, v31, 2 +; NOOPT-NEXT: v_readlane_b32 s3, v31, 3 ; NOOPT-NEXT: buffer_load_dword v15, off, s[20:23], 0 offset:140 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v16, off, s[20:23], 0 offset:144 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v17, off, s[20:23], 0 offset:148 ; 4-byte Folded Reload @@ -4230,26 +4235,22 @@ define amdgpu_kernel void @insert_neg_offset_vgpr(ptr addrspace(1) %in, ptr addr ; NOOPT-NEXT: buffer_load_dword v28, off, s[20:23], 0 offset:192 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v29, off, s[20:23], 0 offset:196 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v30, off, s[20:23], 0 offset:200 ; 4-byte Folded Reload -; NOOPT-NEXT: s_or_saveexec_b64 s[16:17], -1 -; NOOPT-NEXT: buffer_load_dword v31, off, s[20:23], 0 ; 4-byte Folded Reload -; NOOPT-NEXT: s_mov_b64 exec, s[16:17] -; NOOPT-NEXT: s_waitcnt vmcnt(0) -; NOOPT-NEXT: v_readlane_b32 s0, v31, 0 -; NOOPT-NEXT: v_readlane_b32 s1, v31, 1 -; NOOPT-NEXT: v_readlane_b32 s2, v31, 2 -; NOOPT-NEXT: v_readlane_b32 s3, v31, 3 +; NOOPT-NEXT: s_waitcnt vmcnt(12) ; NOOPT-NEXT: v_mov_b32_e32 v4, v18 ; NOOPT-NEXT: v_mov_b32_e32 v5, v17 ; NOOPT-NEXT: v_mov_b32_e32 v6, v16 ; NOOPT-NEXT: v_mov_b32_e32 v0, v15 +; NOOPT-NEXT: s_waitcnt vmcnt(8) ; NOOPT-NEXT: v_mov_b32_e32 v1, v22 ; NOOPT-NEXT: v_mov_b32_e32 v2, v21 ; NOOPT-NEXT: v_mov_b32_e32 v3, v20 ; NOOPT-NEXT: v_mov_b32_e32 v7, v19 +; NOOPT-NEXT: s_waitcnt vmcnt(4) ; NOOPT-NEXT: v_mov_b32_e32 v12, v26 ; NOOPT-NEXT: v_mov_b32_e32 v13, v25 ; NOOPT-NEXT: v_mov_b32_e32 v14, v24 ; NOOPT-NEXT: v_mov_b32_e32 v8, v23 +; NOOPT-NEXT: s_waitcnt vmcnt(0) ; NOOPT-NEXT: v_mov_b32_e32 v9, v30 ; NOOPT-NEXT: v_mov_b32_e32 v10, v29 ; NOOPT-NEXT: v_mov_b32_e32 v11, v28 @@ -4611,6 +4612,13 @@ define amdgpu_kernel void @insert_neg_inline_offset_vgpr(ptr addrspace(1) %in, p ; NOOPT-NEXT: buffer_store_dword v15, off, s[20:23], 0 offset:64 ; 4-byte Folded Spill ; NOOPT-NEXT: ; implicit-def: $sgpr0_sgpr1 ; NOOPT-NEXT: .LBB15_1: ; =>This Inner Loop Header: Depth=1 +; NOOPT-NEXT: s_or_saveexec_b64 s[16:17], -1 +; NOOPT-NEXT: s_waitcnt expcnt(0) +; NOOPT-NEXT: buffer_load_dword v31, off, s[20:23], 0 ; 4-byte Folded Reload +; NOOPT-NEXT: s_mov_b64 exec, s[16:17] +; NOOPT-NEXT: s_waitcnt vmcnt(0) +; NOOPT-NEXT: v_readlane_b32 s0, v31, 6 +; NOOPT-NEXT: v_readlane_b32 s1, v31, 7 ; NOOPT-NEXT: buffer_load_dword v0, off, s[20:23], 0 offset:4 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v1, off, s[20:23], 0 offset:8 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v2, off, s[20:23], 0 offset:12 ; 4-byte Folded Reload @@ -4636,12 +4644,7 @@ define amdgpu_kernel void @insert_neg_inline_offset_vgpr(ptr addrspace(1) %in, p ; NOOPT-NEXT: buffer_load_dword v15, off, s[20:23], 0 offset:64 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v16, off, s[20:23], 0 offset:68 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v17, off, s[20:23], 0 offset:136 ; 4-byte Folded Reload -; NOOPT-NEXT: s_or_saveexec_b64 s[16:17], -1 -; NOOPT-NEXT: buffer_load_dword v31, off, s[20:23], 0 ; 4-byte Folded Reload -; NOOPT-NEXT: s_mov_b64 exec, s[16:17] ; NOOPT-NEXT: s_waitcnt vmcnt(0) -; NOOPT-NEXT: v_readlane_b32 s0, v31, 6 -; NOOPT-NEXT: v_readlane_b32 s1, v31, 7 ; NOOPT-NEXT: v_readfirstlane_b32 s2, v17 ; NOOPT-NEXT: v_cmp_eq_u32_e64 s[0:1], s2, v17 ; NOOPT-NEXT: s_and_saveexec_b64 s[0:1], s[0:1] @@ -4697,6 +4700,14 @@ define amdgpu_kernel void @insert_neg_inline_offset_vgpr(ptr addrspace(1) %in, p ; NOOPT-NEXT: v_readlane_b32 s1, v31, 5 ; NOOPT-NEXT: s_mov_b64 exec, s[0:1] ; NOOPT-NEXT: ; %bb.3: +; NOOPT-NEXT: s_or_saveexec_b64 s[16:17], -1 +; NOOPT-NEXT: buffer_load_dword v31, off, s[20:23], 0 ; 4-byte Folded Reload +; NOOPT-NEXT: s_mov_b64 exec, s[16:17] +; NOOPT-NEXT: s_waitcnt vmcnt(0) +; NOOPT-NEXT: v_readlane_b32 s0, v31, 0 +; NOOPT-NEXT: v_readlane_b32 s1, v31, 1 +; NOOPT-NEXT: v_readlane_b32 s2, v31, 2 +; NOOPT-NEXT: v_readlane_b32 s3, v31, 3 ; NOOPT-NEXT: buffer_load_dword v15, off, s[20:23], 0 offset:140 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v16, off, s[20:23], 0 offset:144 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v17, off, s[20:23], 0 offset:148 ; 4-byte Folded Reload @@ -4713,26 +4724,22 @@ define amdgpu_kernel void @insert_neg_inline_offset_vgpr(ptr addrspace(1) %in, p ; NOOPT-NEXT: buffer_load_dword v28, off, s[20:23], 0 offset:192 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v29, off, s[20:23], 0 offset:196 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v30, off, s[20:23], 0 offset:200 ; 4-byte Folded Reload -; NOOPT-NEXT: s_or_saveexec_b64 s[16:17], -1 -; NOOPT-NEXT: buffer_load_dword v31, off, s[20:23], 0 ; 4-byte Folded Reload -; NOOPT-NEXT: s_mov_b64 exec, s[16:17] -; NOOPT-NEXT: s_waitcnt vmcnt(0) -; NOOPT-NEXT: v_readlane_b32 s0, v31, 0 -; NOOPT-NEXT: v_readlane_b32 s1, v31, 1 -; NOOPT-NEXT: v_readlane_b32 s2, v31, 2 -; NOOPT-NEXT: v_readlane_b32 s3, v31, 3 +; NOOPT-NEXT: s_waitcnt vmcnt(12) ; NOOPT-NEXT: v_mov_b32_e32 v4, v18 ; NOOPT-NEXT: v_mov_b32_e32 v5, v17 ; NOOPT-NEXT: v_mov_b32_e32 v6, v16 ; NOOPT-NEXT: v_mov_b32_e32 v0, v15 +; NOOPT-NEXT: s_waitcnt vmcnt(8) ; NOOPT-NEXT: v_mov_b32_e32 v1, v22 ; NOOPT-NEXT: v_mov_b32_e32 v2, v21 ; NOOPT-NEXT: v_mov_b32_e32 v3, v20 ; NOOPT-NEXT: v_mov_b32_e32 v7, v19 +; NOOPT-NEXT: s_waitcnt vmcnt(4) ; NOOPT-NEXT: v_mov_b32_e32 v12, v26 ; NOOPT-NEXT: v_mov_b32_e32 v13, v25 ; NOOPT-NEXT: v_mov_b32_e32 v14, v24 ; NOOPT-NEXT: v_mov_b32_e32 v8, v23 +; NOOPT-NEXT: s_waitcnt vmcnt(0) ; NOOPT-NEXT: v_mov_b32_e32 v9, v30 ; NOOPT-NEXT: v_mov_b32_e32 v10, v29 ; NOOPT-NEXT: v_mov_b32_e32 v11, v28 @@ -5163,8 +5170,14 @@ define amdgpu_kernel void @extract_vgpr_offset_multiple_in_block(ptr addrspace(1 ; NOOPT-NEXT: ; implicit-def: $vgpr0 ; NOOPT-NEXT: ; implicit-def: $sgpr0_sgpr1 ; NOOPT-NEXT: .LBB16_1: ; =>This Inner Loop Header: Depth=1 +; NOOPT-NEXT: s_or_saveexec_b64 s[28:29], -1 +; NOOPT-NEXT: s_waitcnt expcnt(0) +; NOOPT-NEXT: buffer_load_dword v18, off, s[36:39], 0 ; 4-byte Folded Reload +; NOOPT-NEXT: s_mov_b64 exec, s[28:29] +; NOOPT-NEXT: s_waitcnt vmcnt(0) +; NOOPT-NEXT: v_readlane_b32 s0, v18, 23 +; NOOPT-NEXT: v_readlane_b32 s1, v18, 24 ; NOOPT-NEXT: buffer_load_dword v17, off, s[36:39], 0 offset:80 ; 4-byte Folded Reload -; NOOPT-NEXT: s_waitcnt expcnt(1) ; NOOPT-NEXT: buffer_load_dword v0, off, s[36:39], 0 offset:4 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v1, off, s[36:39], 0 offset:8 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v2, off, s[36:39], 0 offset:12 ; 4-byte Folded Reload @@ -5175,26 +5188,14 @@ define amdgpu_kernel void @extract_vgpr_offset_multiple_in_block(ptr addrspace(1 ; NOOPT-NEXT: buffer_load_dword v7, off, s[36:39], 0 offset:32 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v8, off, s[36:39], 0 offset:36 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v9, off, s[36:39], 0 offset:40 ; 4-byte Folded Reload -; NOOPT-NEXT: s_waitcnt expcnt(6) ; NOOPT-NEXT: buffer_load_dword v10, off, s[36:39], 0 offset:44 ; 4-byte Folded Reload -; NOOPT-NEXT: s_waitcnt expcnt(5) ; NOOPT-NEXT: buffer_load_dword v11, off, s[36:39], 0 offset:48 ; 4-byte Folded Reload -; NOOPT-NEXT: s_waitcnt expcnt(4) ; NOOPT-NEXT: buffer_load_dword v12, off, s[36:39], 0 offset:52 ; 4-byte Folded Reload -; NOOPT-NEXT: s_waitcnt expcnt(3) ; NOOPT-NEXT: buffer_load_dword v13, off, s[36:39], 0 offset:56 ; 4-byte Folded Reload -; NOOPT-NEXT: s_waitcnt expcnt(2) ; NOOPT-NEXT: buffer_load_dword v14, off, s[36:39], 0 offset:60 ; 4-byte Folded Reload -; NOOPT-NEXT: s_waitcnt expcnt(1) ; NOOPT-NEXT: buffer_load_dword v15, off, s[36:39], 0 offset:64 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v16, off, s[36:39], 0 offset:72 ; 4-byte Folded Reload -; NOOPT-NEXT: s_or_saveexec_b64 s[28:29], -1 -; NOOPT-NEXT: s_waitcnt expcnt(0) -; NOOPT-NEXT: buffer_load_dword v18, off, s[36:39], 0 ; 4-byte Folded Reload -; NOOPT-NEXT: s_mov_b64 exec, s[28:29] ; NOOPT-NEXT: s_waitcnt vmcnt(0) -; NOOPT-NEXT: v_readlane_b32 s0, v18, 23 -; NOOPT-NEXT: v_readlane_b32 s1, v18, 24 ; NOOPT-NEXT: v_readfirstlane_b32 s2, v16 ; NOOPT-NEXT: v_cmp_eq_u32_e64 s[0:1], s2, v16 ; NOOPT-NEXT: s_and_saveexec_b64 s[0:1], s[0:1] @@ -5286,8 +5287,14 @@ define amdgpu_kernel void @extract_vgpr_offset_multiple_in_block(ptr addrspace(1 ; NOOPT-NEXT: ; implicit-def: $vgpr0 ; NOOPT-NEXT: ; implicit-def: $sgpr0_sgpr1 ; NOOPT-NEXT: .LBB16_4: ; =>This Inner Loop Header: Depth=1 +; NOOPT-NEXT: s_or_saveexec_b64 s[28:29], -1 +; NOOPT-NEXT: s_waitcnt expcnt(0) +; NOOPT-NEXT: buffer_load_dword v18, off, s[36:39], 0 ; 4-byte Folded Reload +; NOOPT-NEXT: s_mov_b64 exec, s[28:29] +; NOOPT-NEXT: s_waitcnt vmcnt(0) +; NOOPT-NEXT: v_readlane_b32 s0, v18, 28 +; NOOPT-NEXT: v_readlane_b32 s1, v18, 29 ; NOOPT-NEXT: buffer_load_dword v17, off, s[36:39], 0 offset:152 ; 4-byte Folded Reload -; NOOPT-NEXT: s_waitcnt expcnt(1) ; NOOPT-NEXT: buffer_load_dword v0, off, s[36:39], 0 offset:88 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v1, off, s[36:39], 0 offset:92 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v2, off, s[36:39], 0 offset:96 ; 4-byte Folded Reload @@ -5298,26 +5305,14 @@ define amdgpu_kernel void @extract_vgpr_offset_multiple_in_block(ptr addrspace(1 ; NOOPT-NEXT: buffer_load_dword v7, off, s[36:39], 0 offset:116 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v8, off, s[36:39], 0 offset:120 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v9, off, s[36:39], 0 offset:124 ; 4-byte Folded Reload -; NOOPT-NEXT: s_waitcnt expcnt(6) ; NOOPT-NEXT: buffer_load_dword v10, off, s[36:39], 0 offset:128 ; 4-byte Folded Reload -; NOOPT-NEXT: s_waitcnt expcnt(5) ; NOOPT-NEXT: buffer_load_dword v11, off, s[36:39], 0 offset:132 ; 4-byte Folded Reload -; NOOPT-NEXT: s_waitcnt expcnt(4) ; NOOPT-NEXT: buffer_load_dword v12, off, s[36:39], 0 offset:136 ; 4-byte Folded Reload -; NOOPT-NEXT: s_waitcnt expcnt(3) ; NOOPT-NEXT: buffer_load_dword v13, off, s[36:39], 0 offset:140 ; 4-byte Folded Reload -; NOOPT-NEXT: s_waitcnt expcnt(2) ; NOOPT-NEXT: buffer_load_dword v14, off, s[36:39], 0 offset:144 ; 4-byte Folded Reload -; NOOPT-NEXT: s_waitcnt expcnt(1) ; NOOPT-NEXT: buffer_load_dword v15, off, s[36:39], 0 offset:148 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v16, off, s[36:39], 0 offset:68 ; 4-byte Folded Reload -; NOOPT-NEXT: s_or_saveexec_b64 s[28:29], -1 -; NOOPT-NEXT: s_waitcnt expcnt(0) -; NOOPT-NEXT: buffer_load_dword v18, off, s[36:39], 0 ; 4-byte Folded Reload -; NOOPT-NEXT: s_mov_b64 exec, s[28:29] ; NOOPT-NEXT: s_waitcnt vmcnt(0) -; NOOPT-NEXT: v_readlane_b32 s0, v18, 28 -; NOOPT-NEXT: v_readlane_b32 s1, v18, 29 ; NOOPT-NEXT: v_readfirstlane_b32 s2, v16 ; NOOPT-NEXT: v_cmp_eq_u32_e64 s[0:1], s2, v16 ; NOOPT-NEXT: s_and_saveexec_b64 s[0:1], s[0:1] @@ -5343,9 +5338,6 @@ define amdgpu_kernel void @extract_vgpr_offset_multiple_in_block(ptr addrspace(1 ; NOOPT-NEXT: v_readlane_b32 s1, v18, 27 ; NOOPT-NEXT: s_mov_b64 exec, s[0:1] ; NOOPT-NEXT: ; %bb.6: -; NOOPT-NEXT: buffer_load_dword v0, off, s[36:39], 0 offset:76 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v1, off, s[36:39], 0 offset:156 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v2, off, s[36:39], 0 offset:84 ; 4-byte Folded Reload ; NOOPT-NEXT: s_or_saveexec_b64 s[28:29], -1 ; NOOPT-NEXT: buffer_load_dword v18, off, s[36:39], 0 ; 4-byte Folded Reload ; NOOPT-NEXT: s_mov_b64 exec, s[28:29] @@ -5355,6 +5347,10 @@ define amdgpu_kernel void @extract_vgpr_offset_multiple_in_block(ptr addrspace(1 ; NOOPT-NEXT: v_readlane_b32 s5, v18, 1 ; NOOPT-NEXT: v_readlane_b32 s6, v18, 2 ; NOOPT-NEXT: v_readlane_b32 s7, v18, 3 +; NOOPT-NEXT: buffer_load_dword v0, off, s[36:39], 0 offset:76 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v1, off, s[36:39], 0 offset:156 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v2, off, s[36:39], 0 offset:84 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt vmcnt(0) ; NOOPT-NEXT: buffer_store_dword v2, off, s[4:7], 0 ; NOOPT-NEXT: s_waitcnt vmcnt(0) ; NOOPT-NEXT: buffer_store_dword v1, off, s[4:7], 0 @@ -5903,6 +5899,13 @@ define amdgpu_kernel void @insert_vgpr_offset_multiple_in_block(ptr addrspace(1) ; NOOPT-NEXT: buffer_store_dword v15, off, s[28:31], 0 offset:64 ; 4-byte Folded Spill ; NOOPT-NEXT: ; implicit-def: $sgpr0_sgpr1 ; NOOPT-NEXT: .LBB17_1: ; =>This Inner Loop Header: Depth=1 +; NOOPT-NEXT: s_or_saveexec_b64 s[26:27], -1 +; NOOPT-NEXT: s_waitcnt expcnt(0) +; NOOPT-NEXT: buffer_load_dword v32, off, s[28:31], 0 ; 4-byte Folded Reload +; NOOPT-NEXT: s_mov_b64 exec, s[26:27] +; NOOPT-NEXT: s_waitcnt vmcnt(0) +; NOOPT-NEXT: v_readlane_b32 s0, v32, 7 +; NOOPT-NEXT: v_readlane_b32 s1, v32, 8 ; NOOPT-NEXT: buffer_load_dword v0, off, s[28:31], 0 offset:4 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v1, off, s[28:31], 0 offset:8 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v2, off, s[28:31], 0 offset:12 ; 4-byte Folded Reload @@ -5928,12 +5931,7 @@ define amdgpu_kernel void @insert_vgpr_offset_multiple_in_block(ptr addrspace(1) ; NOOPT-NEXT: buffer_load_dword v15, off, s[28:31], 0 offset:64 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v16, off, s[28:31], 0 offset:72 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v17, off, s[28:31], 0 offset:80 ; 4-byte Folded Reload -; NOOPT-NEXT: s_or_saveexec_b64 s[26:27], -1 -; NOOPT-NEXT: buffer_load_dword v32, off, s[28:31], 0 ; 4-byte Folded Reload -; NOOPT-NEXT: s_mov_b64 exec, s[26:27] ; NOOPT-NEXT: s_waitcnt vmcnt(0) -; NOOPT-NEXT: v_readlane_b32 s0, v32, 7 -; NOOPT-NEXT: v_readlane_b32 s1, v32, 8 ; NOOPT-NEXT: v_readfirstlane_b32 s2, v17 ; NOOPT-NEXT: v_cmp_eq_u32_e64 s[0:1], s2, v17 ; NOOPT-NEXT: s_and_saveexec_b64 s[0:1], s[0:1] @@ -5989,6 +5987,9 @@ define amdgpu_kernel void @insert_vgpr_offset_multiple_in_block(ptr addrspace(1) ; NOOPT-NEXT: v_readlane_b32 s1, v32, 6 ; NOOPT-NEXT: s_mov_b64 exec, s[0:1] ; NOOPT-NEXT: ; %bb.3: +; NOOPT-NEXT: s_or_saveexec_b64 s[26:27], -1 +; NOOPT-NEXT: buffer_load_dword v32, off, s[28:31], 0 ; 4-byte Folded Reload +; NOOPT-NEXT: s_mov_b64 exec, s[26:27] ; NOOPT-NEXT: buffer_load_dword v0, off, s[28:31], 0 offset:88 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v1, off, s[28:31], 0 offset:92 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v2, off, s[28:31], 0 offset:96 ; 4-byte Folded Reload @@ -6005,13 +6006,10 @@ define amdgpu_kernel void @insert_vgpr_offset_multiple_in_block(ptr addrspace(1) ; NOOPT-NEXT: buffer_load_dword v13, off, s[28:31], 0 offset:140 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v14, off, s[28:31], 0 offset:144 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v15, off, s[28:31], 0 offset:148 ; 4-byte Folded Reload -; NOOPT-NEXT: s_or_saveexec_b64 s[26:27], -1 -; NOOPT-NEXT: buffer_load_dword v32, off, s[28:31], 0 ; 4-byte Folded Reload -; NOOPT-NEXT: s_mov_b64 exec, s[26:27] ; NOOPT-NEXT: v_mov_b32_e32 v16, 63 ; NOOPT-NEXT: buffer_store_dword v16, off, s[28:31], 0 offset:216 ; 4-byte Folded Spill ; NOOPT-NEXT: s_mov_b64 s[0:1], exec -; NOOPT-NEXT: s_waitcnt vmcnt(1) +; NOOPT-NEXT: s_waitcnt vmcnt(14) ; NOOPT-NEXT: v_writelane_b32 v32, s0, 9 ; NOOPT-NEXT: v_writelane_b32 v32, s1, 10 ; NOOPT-NEXT: s_or_saveexec_b64 s[26:27], -1 @@ -6020,21 +6018,32 @@ define amdgpu_kernel void @insert_vgpr_offset_multiple_in_block(ptr addrspace(1) ; NOOPT-NEXT: buffer_store_dword v0, off, s[28:31], 0 offset:152 ; 4-byte Folded Spill ; NOOPT-NEXT: buffer_store_dword v1, off, s[28:31], 0 offset:156 ; 4-byte Folded Spill ; NOOPT-NEXT: buffer_store_dword v2, off, s[28:31], 0 offset:160 ; 4-byte Folded Spill +; NOOPT-NEXT: s_waitcnt vmcnt(14) ; NOOPT-NEXT: buffer_store_dword v3, off, s[28:31], 0 offset:164 ; 4-byte Folded Spill ; NOOPT-NEXT: buffer_store_dword v4, off, s[28:31], 0 offset:168 ; 4-byte Folded Spill ; NOOPT-NEXT: buffer_store_dword v5, off, s[28:31], 0 offset:172 ; 4-byte Folded Spill ; NOOPT-NEXT: buffer_store_dword v6, off, s[28:31], 0 offset:176 ; 4-byte Folded Spill +; NOOPT-NEXT: s_waitcnt vmcnt(14) ; NOOPT-NEXT: buffer_store_dword v7, off, s[28:31], 0 offset:180 ; 4-byte Folded Spill ; NOOPT-NEXT: buffer_store_dword v8, off, s[28:31], 0 offset:184 ; 4-byte Folded Spill ; NOOPT-NEXT: buffer_store_dword v9, off, s[28:31], 0 offset:188 ; 4-byte Folded Spill ; NOOPT-NEXT: buffer_store_dword v10, off, s[28:31], 0 offset:192 ; 4-byte Folded Spill +; NOOPT-NEXT: s_waitcnt vmcnt(14) ; NOOPT-NEXT: buffer_store_dword v11, off, s[28:31], 0 offset:196 ; 4-byte Folded Spill ; NOOPT-NEXT: buffer_store_dword v12, off, s[28:31], 0 offset:200 ; 4-byte Folded Spill ; NOOPT-NEXT: buffer_store_dword v13, off, s[28:31], 0 offset:204 ; 4-byte Folded Spill ; NOOPT-NEXT: buffer_store_dword v14, off, s[28:31], 0 offset:208 ; 4-byte Folded Spill +; NOOPT-NEXT: s_waitcnt vmcnt(14) ; NOOPT-NEXT: buffer_store_dword v15, off, s[28:31], 0 offset:212 ; 4-byte Folded Spill ; NOOPT-NEXT: ; implicit-def: $sgpr0_sgpr1 ; NOOPT-NEXT: .LBB17_4: ; =>This Inner Loop Header: Depth=1 +; NOOPT-NEXT: s_or_saveexec_b64 s[26:27], -1 +; NOOPT-NEXT: s_waitcnt expcnt(0) +; NOOPT-NEXT: buffer_load_dword v32, off, s[28:31], 0 ; 4-byte Folded Reload +; NOOPT-NEXT: s_mov_b64 exec, s[26:27] +; NOOPT-NEXT: s_waitcnt vmcnt(0) +; NOOPT-NEXT: v_readlane_b32 s0, v32, 11 +; NOOPT-NEXT: v_readlane_b32 s1, v32, 12 ; NOOPT-NEXT: buffer_load_dword v0, off, s[28:31], 0 offset:152 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v1, off, s[28:31], 0 offset:156 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v2, off, s[28:31], 0 offset:160 ; 4-byte Folded Reload @@ -6060,12 +6069,7 @@ define amdgpu_kernel void @insert_vgpr_offset_multiple_in_block(ptr addrspace(1) ; NOOPT-NEXT: buffer_load_dword v15, off, s[28:31], 0 offset:212 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v16, off, s[28:31], 0 offset:216 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v17, off, s[28:31], 0 offset:76 ; 4-byte Folded Reload -; NOOPT-NEXT: s_or_saveexec_b64 s[26:27], -1 -; NOOPT-NEXT: buffer_load_dword v32, off, s[28:31], 0 ; 4-byte Folded Reload -; NOOPT-NEXT: s_mov_b64 exec, s[26:27] ; NOOPT-NEXT: s_waitcnt vmcnt(0) -; NOOPT-NEXT: v_readlane_b32 s0, v32, 11 -; NOOPT-NEXT: v_readlane_b32 s1, v32, 12 ; NOOPT-NEXT: v_readfirstlane_b32 s2, v17 ; NOOPT-NEXT: v_cmp_eq_u32_e64 s[0:1], s2, v17 ; NOOPT-NEXT: s_and_saveexec_b64 s[0:1], s[0:1] @@ -6121,6 +6125,15 @@ define amdgpu_kernel void @insert_vgpr_offset_multiple_in_block(ptr addrspace(1) ; NOOPT-NEXT: v_readlane_b32 s1, v32, 10 ; NOOPT-NEXT: s_mov_b64 exec, s[0:1] ; NOOPT-NEXT: ; %bb.6: +; NOOPT-NEXT: s_or_saveexec_b64 s[26:27], -1 +; NOOPT-NEXT: buffer_load_dword v32, off, s[28:31], 0 ; 4-byte Folded Reload +; NOOPT-NEXT: s_mov_b64 exec, s[26:27] +; NOOPT-NEXT: s_waitcnt vmcnt(0) +; NOOPT-NEXT: v_readlane_b32 s0, v32, 4 +; NOOPT-NEXT: v_readlane_b32 s4, v32, 0 +; NOOPT-NEXT: v_readlane_b32 s5, v32, 1 +; NOOPT-NEXT: v_readlane_b32 s6, v32, 2 +; NOOPT-NEXT: v_readlane_b32 s7, v32, 3 ; NOOPT-NEXT: buffer_load_dword v0, off, s[28:31], 0 offset:84 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v16, off, s[28:31], 0 offset:220 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v17, off, s[28:31], 0 offset:224 ; 4-byte Folded Reload @@ -6138,27 +6151,22 @@ define amdgpu_kernel void @insert_vgpr_offset_multiple_in_block(ptr addrspace(1) ; NOOPT-NEXT: buffer_load_dword v29, off, s[28:31], 0 offset:272 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v30, off, s[28:31], 0 offset:276 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v31, off, s[28:31], 0 offset:280 ; 4-byte Folded Reload -; NOOPT-NEXT: s_or_saveexec_b64 s[26:27], -1 -; NOOPT-NEXT: buffer_load_dword v32, off, s[28:31], 0 ; 4-byte Folded Reload -; NOOPT-NEXT: s_mov_b64 exec, s[26:27] -; NOOPT-NEXT: s_waitcnt vmcnt(0) -; NOOPT-NEXT: v_readlane_b32 s0, v32, 4 -; NOOPT-NEXT: v_readlane_b32 s4, v32, 0 -; NOOPT-NEXT: v_readlane_b32 s5, v32, 1 -; NOOPT-NEXT: v_readlane_b32 s6, v32, 2 -; NOOPT-NEXT: v_readlane_b32 s7, v32, 3 +; NOOPT-NEXT: s_waitcnt vmcnt(12) ; NOOPT-NEXT: v_mov_b32_e32 v5, v19 ; NOOPT-NEXT: v_mov_b32_e32 v6, v18 ; NOOPT-NEXT: v_mov_b32_e32 v7, v17 ; NOOPT-NEXT: v_mov_b32_e32 v1, v16 +; NOOPT-NEXT: s_waitcnt vmcnt(8) ; NOOPT-NEXT: v_mov_b32_e32 v2, v23 ; NOOPT-NEXT: v_mov_b32_e32 v3, v22 ; NOOPT-NEXT: v_mov_b32_e32 v4, v21 ; NOOPT-NEXT: v_mov_b32_e32 v8, v20 +; NOOPT-NEXT: s_waitcnt vmcnt(4) ; NOOPT-NEXT: v_mov_b32_e32 v13, v27 ; NOOPT-NEXT: v_mov_b32_e32 v14, v26 ; NOOPT-NEXT: v_mov_b32_e32 v15, v25 ; NOOPT-NEXT: v_mov_b32_e32 v9, v24 +; NOOPT-NEXT: s_waitcnt vmcnt(0) ; NOOPT-NEXT: v_mov_b32_e32 v10, v31 ; NOOPT-NEXT: v_mov_b32_e32 v11, v30 ; NOOPT-NEXT: v_mov_b32_e32 v12, v29 @@ -9071,15 +9079,17 @@ define amdgpu_kernel void @broken_phi_bb(i32 %arg, i32 %arg1) { ; NOOPT-NEXT: .LBB26_1: ; %bb2 ; NOOPT-NEXT: ; =>This Loop Header: Depth=1 ; NOOPT-NEXT: ; Child Loop BB26_3 Depth 2 -; NOOPT-NEXT: s_waitcnt expcnt(0) -; NOOPT-NEXT: buffer_load_dword v0, off, s[24:27], 0 offset:4 ; 4-byte Folded Reload ; NOOPT-NEXT: s_or_saveexec_b64 s[20:21], -1 +; NOOPT-NEXT: s_waitcnt expcnt(1) ; NOOPT-NEXT: buffer_load_dword v18, off, s[24:27], 0 ; 4-byte Folded Reload ; NOOPT-NEXT: s_mov_b64 exec, s[20:21] ; NOOPT-NEXT: s_waitcnt vmcnt(0) ; NOOPT-NEXT: v_readlane_b32 s2, v18, 0 +; NOOPT-NEXT: s_waitcnt expcnt(0) +; NOOPT-NEXT: buffer_load_dword v0, off, s[24:27], 0 offset:4 ; 4-byte Folded Reload ; NOOPT-NEXT: s_mov_b64 s[0:1], -1 ; NOOPT-NEXT: ; implicit-def: $sgpr4 +; NOOPT-NEXT: s_waitcnt vmcnt(0) ; NOOPT-NEXT: v_cmp_ge_i32_e64 s[2:3], v0, s2 ; NOOPT-NEXT: v_mov_b32_e32 v0, s4 ; NOOPT-NEXT: s_and_b64 vcc, exec, s[2:3] @@ -9156,6 +9166,13 @@ define amdgpu_kernel void @broken_phi_bb(i32 %arg, i32 %arg1) { ; NOOPT-NEXT: ; implicit-def: $sgpr0_sgpr1 ; NOOPT-NEXT: .LBB26_3: ; Parent Loop BB26_1 Depth=1 ; NOOPT-NEXT: ; => This Inner Loop Header: Depth=2 +; NOOPT-NEXT: s_or_saveexec_b64 s[20:21], -1 +; NOOPT-NEXT: s_waitcnt expcnt(0) +; NOOPT-NEXT: buffer_load_dword v18, off, s[24:27], 0 ; 4-byte Folded Reload +; NOOPT-NEXT: s_mov_b64 exec, s[20:21] +; NOOPT-NEXT: s_waitcnt vmcnt(0) +; NOOPT-NEXT: v_readlane_b32 s0, v18, 6 +; NOOPT-NEXT: v_readlane_b32 s1, v18, 7 ; NOOPT-NEXT: buffer_load_dword v0, off, s[24:27], 0 offset:12 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v1, off, s[24:27], 0 offset:16 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v2, off, s[24:27], 0 offset:20 ; 4-byte Folded Reload @@ -9181,12 +9198,7 @@ define amdgpu_kernel void @broken_phi_bb(i32 %arg, i32 %arg1) { ; NOOPT-NEXT: buffer_load_dword v15, off, s[24:27], 0 offset:72 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v16, off, s[24:27], 0 offset:76 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v17, off, s[24:27], 0 offset:80 ; 4-byte Folded Reload -; NOOPT-NEXT: s_or_saveexec_b64 s[20:21], -1 -; NOOPT-NEXT: buffer_load_dword v18, off, s[24:27], 0 ; 4-byte Folded Reload -; NOOPT-NEXT: s_mov_b64 exec, s[20:21] ; NOOPT-NEXT: s_waitcnt vmcnt(0) -; NOOPT-NEXT: v_readlane_b32 s0, v18, 6 -; NOOPT-NEXT: v_readlane_b32 s1, v18, 7 ; NOOPT-NEXT: v_readfirstlane_b32 s2, v17 ; NOOPT-NEXT: v_cmp_eq_u32_e64 s[0:1], s2, v17 ; NOOPT-NEXT: s_and_saveexec_b64 s[0:1], s[0:1] @@ -9242,6 +9254,9 @@ define amdgpu_kernel void @broken_phi_bb(i32 %arg, i32 %arg1) { ; NOOPT-NEXT: v_readlane_b32 s1, v18, 5 ; NOOPT-NEXT: s_mov_b64 exec, s[0:1] ; NOOPT-NEXT: ; %bb.5: ; in Loop: Header=BB26_1 Depth=1 +; NOOPT-NEXT: s_or_saveexec_b64 s[20:21], -1 +; NOOPT-NEXT: buffer_load_dword v18, off, s[24:27], 0 ; 4-byte Folded Reload +; NOOPT-NEXT: s_mov_b64 exec, s[20:21] ; NOOPT-NEXT: buffer_load_dword v0, off, s[24:27], 0 offset:84 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v1, off, s[24:27], 0 offset:88 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v2, off, s[24:27], 0 offset:92 ; 4-byte Folded Reload @@ -9258,13 +9273,9 @@ define amdgpu_kernel void @broken_phi_bb(i32 %arg, i32 %arg1) { ; NOOPT-NEXT: buffer_load_dword v13, off, s[24:27], 0 offset:136 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v14, off, s[24:27], 0 offset:140 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v15, off, s[24:27], 0 offset:144 ; 4-byte Folded Reload -; NOOPT-NEXT: s_or_saveexec_b64 s[20:21], -1 -; NOOPT-NEXT: buffer_load_dword v18, off, s[24:27], 0 ; 4-byte Folded Reload -; NOOPT-NEXT: s_mov_b64 exec, s[20:21] ; NOOPT-NEXT: s_mov_b64 s[0:1], 0 ; NOOPT-NEXT: s_waitcnt vmcnt(14) ; NOOPT-NEXT: buffer_store_dword v0, off, s[24:27], 0 offset:8 ; 4-byte Folded Spill -; NOOPT-NEXT: s_waitcnt vmcnt(1) ; NOOPT-NEXT: v_writelane_b32 v18, s0, 2 ; NOOPT-NEXT: v_writelane_b32 v18, s1, 3 ; NOOPT-NEXT: s_or_saveexec_b64 s[20:21], -1 @@ -9272,8 +9283,6 @@ define amdgpu_kernel void @broken_phi_bb(i32 %arg, i32 %arg1) { ; NOOPT-NEXT: s_mov_b64 exec, s[20:21] ; NOOPT-NEXT: .LBB26_6: ; %Flow ; NOOPT-NEXT: ; in Loop: Header=BB26_1 Depth=1 -; NOOPT-NEXT: s_waitcnt expcnt(1) -; NOOPT-NEXT: buffer_load_dword v0, off, s[24:27], 0 offset:8 ; 4-byte Folded Reload ; NOOPT-NEXT: s_or_saveexec_b64 s[20:21], -1 ; NOOPT-NEXT: s_waitcnt expcnt(0) ; NOOPT-NEXT: buffer_load_dword v18, off, s[24:27], 0 ; 4-byte Folded Reload @@ -9281,11 +9290,13 @@ define amdgpu_kernel void @broken_phi_bb(i32 %arg, i32 %arg1) { ; NOOPT-NEXT: s_waitcnt vmcnt(0) ; NOOPT-NEXT: v_readlane_b32 s0, v18, 2 ; NOOPT-NEXT: v_readlane_b32 s1, v18, 3 +; NOOPT-NEXT: buffer_load_dword v0, off, s[24:27], 0 offset:8 ; 4-byte Folded Reload ; NOOPT-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[0:1] ; NOOPT-NEXT: s_mov_b32 s0, 1 ; NOOPT-NEXT: ; implicit-def: $sgpr1 ; NOOPT-NEXT: v_cmp_ne_u32_e64 s[0:1], v1, s0 ; NOOPT-NEXT: s_and_b64 vcc, exec, s[0:1] +; NOOPT-NEXT: s_waitcnt vmcnt(0) ; NOOPT-NEXT: buffer_store_dword v0, off, s[24:27], 0 offset:4 ; 4-byte Folded Spill ; NOOPT-NEXT: s_cbranch_vccnz .LBB26_1 ; NOOPT-NEXT: ; %bb.7: ; %bb8 @@ -9645,6 +9656,13 @@ define amdgpu_cs void @insert_or_disj_index(ptr addrspace(1) %out, ptr addrspace ; NOOPT-NEXT: buffer_store_dword v15, off, s[16:19], 0 offset:64 ; 4-byte Folded Spill ; NOOPT-NEXT: ; implicit-def: $sgpr0_sgpr1 ; NOOPT-NEXT: .LBB27_1: ; =>This Inner Loop Header: Depth=1 +; NOOPT-NEXT: s_or_saveexec_b64 s[12:13], -1 +; NOOPT-NEXT: s_waitcnt expcnt(0) +; NOOPT-NEXT: buffer_load_dword v33, off, s[16:19], 0 ; 4-byte Folded Reload +; NOOPT-NEXT: s_mov_b64 exec, s[12:13] +; NOOPT-NEXT: s_waitcnt vmcnt(0) +; NOOPT-NEXT: v_readlane_b32 s0, v33, 9 +; NOOPT-NEXT: v_readlane_b32 s1, v33, 10 ; NOOPT-NEXT: buffer_load_dword v0, off, s[16:19], 0 offset:4 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v1, off, s[16:19], 0 offset:8 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v2, off, s[16:19], 0 offset:12 ; 4-byte Folded Reload @@ -9670,12 +9688,7 @@ define amdgpu_cs void @insert_or_disj_index(ptr addrspace(1) %out, ptr addrspace ; NOOPT-NEXT: buffer_load_dword v15, off, s[16:19], 0 offset:64 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v16, off, s[16:19], 0 offset:144 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v17, off, s[16:19], 0 offset:132 ; 4-byte Folded Reload -; NOOPT-NEXT: s_or_saveexec_b64 s[12:13], -1 -; NOOPT-NEXT: buffer_load_dword v33, off, s[16:19], 0 ; 4-byte Folded Reload -; NOOPT-NEXT: s_mov_b64 exec, s[12:13] ; NOOPT-NEXT: s_waitcnt vmcnt(0) -; NOOPT-NEXT: v_readlane_b32 s0, v33, 9 -; NOOPT-NEXT: v_readlane_b32 s1, v33, 10 ; NOOPT-NEXT: v_readfirstlane_b32 s2, v17 ; NOOPT-NEXT: v_cmp_eq_u32_e64 s[0:1], s2, v17 ; NOOPT-NEXT: s_and_saveexec_b64 s[0:1], s[0:1] @@ -9731,6 +9744,14 @@ define amdgpu_cs void @insert_or_disj_index(ptr addrspace(1) %out, ptr addrspace ; NOOPT-NEXT: v_readlane_b32 s1, v33, 8 ; NOOPT-NEXT: s_mov_b64 exec, s[0:1] ; NOOPT-NEXT: ; %bb.3: +; NOOPT-NEXT: s_or_saveexec_b64 s[12:13], -1 +; NOOPT-NEXT: buffer_load_dword v33, off, s[16:19], 0 ; 4-byte Folded Reload +; NOOPT-NEXT: s_mov_b64 exec, s[12:13] +; NOOPT-NEXT: s_waitcnt vmcnt(0) +; NOOPT-NEXT: v_readlane_b32 s0, v33, 3 +; NOOPT-NEXT: v_readlane_b32 s1, v33, 4 +; NOOPT-NEXT: v_readlane_b32 s2, v33, 5 +; NOOPT-NEXT: v_readlane_b32 s3, v33, 6 ; NOOPT-NEXT: buffer_load_dword v4, off, s[16:19], 0 offset:136 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v5, off, s[16:19], 0 offset:140 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v17, off, s[16:19], 0 offset:148 ; 4-byte Folded Reload @@ -9749,26 +9770,22 @@ define amdgpu_cs void @insert_or_disj_index(ptr addrspace(1) %out, ptr addrspace ; NOOPT-NEXT: buffer_load_dword v30, off, s[16:19], 0 offset:200 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v31, off, s[16:19], 0 offset:204 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v32, off, s[16:19], 0 offset:208 ; 4-byte Folded Reload -; NOOPT-NEXT: s_or_saveexec_b64 s[12:13], -1 -; NOOPT-NEXT: buffer_load_dword v33, off, s[16:19], 0 ; 4-byte Folded Reload -; NOOPT-NEXT: s_mov_b64 exec, s[12:13] -; NOOPT-NEXT: s_waitcnt vmcnt(0) -; NOOPT-NEXT: v_readlane_b32 s0, v33, 3 -; NOOPT-NEXT: v_readlane_b32 s1, v33, 4 -; NOOPT-NEXT: v_readlane_b32 s2, v33, 5 -; NOOPT-NEXT: v_readlane_b32 s3, v33, 6 +; NOOPT-NEXT: s_waitcnt vmcnt(12) ; NOOPT-NEXT: v_mov_b32_e32 v6, v20 ; NOOPT-NEXT: v_mov_b32_e32 v7, v19 ; NOOPT-NEXT: v_mov_b32_e32 v8, v18 ; NOOPT-NEXT: v_mov_b32_e32 v0, v17 +; NOOPT-NEXT: s_waitcnt vmcnt(8) ; NOOPT-NEXT: v_mov_b32_e32 v1, v24 ; NOOPT-NEXT: v_mov_b32_e32 v2, v23 ; NOOPT-NEXT: v_mov_b32_e32 v3, v22 ; NOOPT-NEXT: v_mov_b32_e32 v9, v21 +; NOOPT-NEXT: s_waitcnt vmcnt(4) ; NOOPT-NEXT: v_mov_b32_e32 v14, v28 ; NOOPT-NEXT: v_mov_b32_e32 v15, v27 ; NOOPT-NEXT: v_mov_b32_e32 v16, v26 ; NOOPT-NEXT: v_mov_b32_e32 v10, v25 +; NOOPT-NEXT: s_waitcnt vmcnt(0) ; NOOPT-NEXT: v_mov_b32_e32 v11, v32 ; NOOPT-NEXT: v_mov_b32_e32 v12, v31 ; NOOPT-NEXT: v_mov_b32_e32 v13, v30 diff --git a/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll b/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll index f6a77a763c2cde9..904fb974e3d7003 100644 --- a/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll +++ b/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll @@ -161,8 +161,8 @@ define void @func_local_stack_offset_uses_sp(ptr addrspace(1) %out) { ; FLATSCR-NEXT: s_mov_b32 s0, 0 ; FLATSCR-NEXT: .LBB1_1: ; %loadstoreloop ; FLATSCR-NEXT: ; =>This Inner Loop Header: Depth=1 -; FLATSCR-NEXT: s_add_i32 s3, s33, s0 -; FLATSCR-NEXT: s_add_i32 s1, s3, 0x3000 +; FLATSCR-NEXT: s_add_i32 s1, s33, s0 +; FLATSCR-NEXT: s_addk_i32 s1, 0x3000 ; FLATSCR-NEXT: s_add_i32 s0, s0, 1 ; FLATSCR-NEXT: s_cmpk_lt_u32 s0, 0x2120 ; FLATSCR-NEXT: scratch_store_byte off, v2, s1 diff --git a/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands-non-ptr-intrinsics.ll b/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands-non-ptr-intrinsics.ll index 76a31a7fac8c1ab..e44803d611f84b3 100644 --- a/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands-non-ptr-intrinsics.ll +++ b/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands-non-ptr-intrinsics.ll @@ -171,23 +171,23 @@ define float @mubuf_vgpr(<4 x i32> %i, i32 %c) #0 { ; W64-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 ; 4-byte Folded Spill ; W64-O0-NEXT: s_mov_b64 exec, s[16:17] ; W64-O0-NEXT: .LBB0_1: ; =>This Inner Loop Header: Depth=1 +; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 +; W64-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 ; 4-byte Folded Reload +; W64-O0-NEXT: s_mov_b64 exec, s[16:17] ; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload ; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload ; W64-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload ; W64-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload -; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 -; W64-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 ; 4-byte Folded Reload -; W64-O0-NEXT: s_mov_b64 exec, s[16:17] -; W64-O0-NEXT: s_waitcnt vmcnt(4) -; W64-O0-NEXT: v_readfirstlane_b32 s8, v0 ; W64-O0-NEXT: s_waitcnt vmcnt(3) +; W64-O0-NEXT: v_readfirstlane_b32 s8, v0 +; W64-O0-NEXT: s_waitcnt vmcnt(2) ; W64-O0-NEXT: v_readfirstlane_b32 s12, v1 ; W64-O0-NEXT: s_mov_b32 s4, s8 ; W64-O0-NEXT: s_mov_b32 s5, s12 ; W64-O0-NEXT: v_cmp_eq_u64_e64 s[4:5], s[4:5], v[0:1] -; W64-O0-NEXT: s_waitcnt vmcnt(2) -; W64-O0-NEXT: v_readfirstlane_b32 s7, v2 ; W64-O0-NEXT: s_waitcnt vmcnt(1) +; W64-O0-NEXT: v_readfirstlane_b32 s7, v2 +; W64-O0-NEXT: s_waitcnt vmcnt(0) ; W64-O0-NEXT: v_readfirstlane_b32 s6, v3 ; W64-O0-NEXT: s_mov_b32 s10, s7 ; W64-O0-NEXT: s_mov_b32 s11, s6 @@ -197,7 +197,6 @@ define float @mubuf_vgpr(<4 x i32> %i, i32 %c) #0 { ; W64-O0-NEXT: s_mov_b32 s9, s12 ; W64-O0-NEXT: s_mov_b32 s10, s7 ; W64-O0-NEXT: s_mov_b32 s11, s6 -; W64-O0-NEXT: s_waitcnt vmcnt(0) ; W64-O0-NEXT: v_writelane_b32 v7, s8, 3 ; W64-O0-NEXT: v_writelane_b32 v7, s9, 4 ; W64-O0-NEXT: v_writelane_b32 v7, s10, 5 @@ -209,7 +208,6 @@ define float @mubuf_vgpr(<4 x i32> %i, i32 %c) #0 { ; W64-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 ; 4-byte Folded Spill ; W64-O0-NEXT: s_mov_b64 exec, s[16:17] ; W64-O0-NEXT: ; %bb.2: ; in Loop: Header=BB0_1 Depth=1 -; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload ; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 ; W64-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 ; 4-byte Folded Reload ; W64-O0-NEXT: s_mov_b64 exec, s[16:17] @@ -221,14 +219,15 @@ define float @mubuf_vgpr(<4 x i32> %i, i32 %c) #0 { ; W64-O0-NEXT: v_readlane_b32 s10, v7, 5 ; W64-O0-NEXT: v_readlane_b32 s11, v7, 6 ; W64-O0-NEXT: v_readlane_b32 s6, v7, 0 -; W64-O0-NEXT: s_nop 4 +; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload +; W64-O0-NEXT: s_waitcnt vmcnt(0) +; W64-O0-NEXT: s_nop 2 ; W64-O0-NEXT: buffer_load_format_x v0, v0, s[8:11], s6 idxen ; W64-O0-NEXT: s_waitcnt vmcnt(0) ; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill ; W64-O0-NEXT: s_xor_b64 exec, exec, s[4:5] ; W64-O0-NEXT: s_cbranch_execnz .LBB0_1 ; W64-O0-NEXT: ; %bb.3: -; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload ; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 ; W64-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 ; 4-byte Folded Reload ; W64-O0-NEXT: s_mov_b64 exec, s[16:17] @@ -236,6 +235,7 @@ define float @mubuf_vgpr(<4 x i32> %i, i32 %c) #0 { ; W64-O0-NEXT: v_readlane_b32 s4, v7, 1 ; W64-O0-NEXT: v_readlane_b32 s5, v7, 2 ; W64-O0-NEXT: s_mov_b64 exec, s[4:5] +; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload ; W64-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; W64-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload ; W64-O0-NEXT: s_mov_b64 exec, s[4:5] @@ -563,23 +563,23 @@ define void @mubuf_vgpr_adjacent_in_block(<4 x i32> %i, <4 x i32> %j, i32 %c, pt ; W64-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 ; 4-byte Folded Spill ; W64-O0-NEXT: s_mov_b64 exec, s[16:17] ; W64-O0-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 +; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 +; W64-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 ; 4-byte Folded Reload +; W64-O0-NEXT: s_mov_b64 exec, s[16:17] ; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload ; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload ; W64-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload ; W64-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload -; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 -; W64-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 ; 4-byte Folded Reload -; W64-O0-NEXT: s_mov_b64 exec, s[16:17] -; W64-O0-NEXT: s_waitcnt vmcnt(4) -; W64-O0-NEXT: v_readfirstlane_b32 s8, v0 ; W64-O0-NEXT: s_waitcnt vmcnt(3) +; W64-O0-NEXT: v_readfirstlane_b32 s8, v0 +; W64-O0-NEXT: s_waitcnt vmcnt(2) ; W64-O0-NEXT: v_readfirstlane_b32 s12, v1 ; W64-O0-NEXT: s_mov_b32 s4, s8 ; W64-O0-NEXT: s_mov_b32 s5, s12 ; W64-O0-NEXT: v_cmp_eq_u64_e64 s[4:5], s[4:5], v[0:1] -; W64-O0-NEXT: s_waitcnt vmcnt(2) -; W64-O0-NEXT: v_readfirstlane_b32 s7, v2 ; W64-O0-NEXT: s_waitcnt vmcnt(1) +; W64-O0-NEXT: v_readfirstlane_b32 s7, v2 +; W64-O0-NEXT: s_waitcnt vmcnt(0) ; W64-O0-NEXT: v_readfirstlane_b32 s6, v3 ; W64-O0-NEXT: s_mov_b32 s10, s7 ; W64-O0-NEXT: s_mov_b32 s11, s6 @@ -589,7 +589,6 @@ define void @mubuf_vgpr_adjacent_in_block(<4 x i32> %i, <4 x i32> %j, i32 %c, pt ; W64-O0-NEXT: s_mov_b32 s9, s12 ; W64-O0-NEXT: s_mov_b32 s10, s7 ; W64-O0-NEXT: s_mov_b32 s11, s6 -; W64-O0-NEXT: s_waitcnt vmcnt(0) ; W64-O0-NEXT: v_writelane_b32 v17, s8, 3 ; W64-O0-NEXT: v_writelane_b32 v17, s9, 4 ; W64-O0-NEXT: v_writelane_b32 v17, s10, 5 @@ -601,7 +600,6 @@ define void @mubuf_vgpr_adjacent_in_block(<4 x i32> %i, <4 x i32> %j, i32 %c, pt ; W64-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 ; 4-byte Folded Spill ; W64-O0-NEXT: s_mov_b64 exec, s[16:17] ; W64-O0-NEXT: ; %bb.2: ; in Loop: Header=BB1_1 Depth=1 -; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload ; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 ; W64-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 ; 4-byte Folded Reload ; W64-O0-NEXT: s_mov_b64 exec, s[16:17] @@ -613,7 +611,9 @@ define void @mubuf_vgpr_adjacent_in_block(<4 x i32> %i, <4 x i32> %j, i32 %c, pt ; W64-O0-NEXT: v_readlane_b32 s10, v17, 5 ; W64-O0-NEXT: v_readlane_b32 s11, v17, 6 ; W64-O0-NEXT: v_readlane_b32 s6, v17, 0 -; W64-O0-NEXT: s_nop 4 +; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload +; W64-O0-NEXT: s_waitcnt vmcnt(0) +; W64-O0-NEXT: s_nop 2 ; W64-O0-NEXT: buffer_load_format_x v0, v0, s[8:11], s6 idxen ; W64-O0-NEXT: s_waitcnt vmcnt(0) ; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill @@ -634,23 +634,23 @@ define void @mubuf_vgpr_adjacent_in_block(<4 x i32> %i, <4 x i32> %j, i32 %c, pt ; W64-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 ; 4-byte Folded Spill ; W64-O0-NEXT: s_mov_b64 exec, s[16:17] ; W64-O0-NEXT: .LBB1_4: ; =>This Inner Loop Header: Depth=1 +; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 +; W64-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 ; 4-byte Folded Reload +; W64-O0-NEXT: s_mov_b64 exec, s[16:17] ; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload ; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload ; W64-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload ; W64-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload -; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 -; W64-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 ; 4-byte Folded Reload -; W64-O0-NEXT: s_mov_b64 exec, s[16:17] -; W64-O0-NEXT: s_waitcnt vmcnt(4) -; W64-O0-NEXT: v_readfirstlane_b32 s8, v0 ; W64-O0-NEXT: s_waitcnt vmcnt(3) +; W64-O0-NEXT: v_readfirstlane_b32 s8, v0 +; W64-O0-NEXT: s_waitcnt vmcnt(2) ; W64-O0-NEXT: v_readfirstlane_b32 s12, v1 ; W64-O0-NEXT: s_mov_b32 s4, s8 ; W64-O0-NEXT: s_mov_b32 s5, s12 ; W64-O0-NEXT: v_cmp_eq_u64_e64 s[4:5], s[4:5], v[0:1] -; W64-O0-NEXT: s_waitcnt vmcnt(2) -; W64-O0-NEXT: v_readfirstlane_b32 s7, v2 ; W64-O0-NEXT: s_waitcnt vmcnt(1) +; W64-O0-NEXT: v_readfirstlane_b32 s7, v2 +; W64-O0-NEXT: s_waitcnt vmcnt(0) ; W64-O0-NEXT: v_readfirstlane_b32 s6, v3 ; W64-O0-NEXT: s_mov_b32 s10, s7 ; W64-O0-NEXT: s_mov_b32 s11, s6 @@ -660,7 +660,6 @@ define void @mubuf_vgpr_adjacent_in_block(<4 x i32> %i, <4 x i32> %j, i32 %c, pt ; W64-O0-NEXT: s_mov_b32 s9, s12 ; W64-O0-NEXT: s_mov_b32 s10, s7 ; W64-O0-NEXT: s_mov_b32 s11, s6 -; W64-O0-NEXT: s_waitcnt vmcnt(0) ; W64-O0-NEXT: v_writelane_b32 v17, s8, 11 ; W64-O0-NEXT: v_writelane_b32 v17, s9, 12 ; W64-O0-NEXT: v_writelane_b32 v17, s10, 13 @@ -672,7 +671,6 @@ define void @mubuf_vgpr_adjacent_in_block(<4 x i32> %i, <4 x i32> %j, i32 %c, pt ; W64-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 ; 4-byte Folded Spill ; W64-O0-NEXT: s_mov_b64 exec, s[16:17] ; W64-O0-NEXT: ; %bb.5: ; in Loop: Header=BB1_4 Depth=1 -; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload ; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 ; W64-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 ; 4-byte Folded Reload ; W64-O0-NEXT: s_mov_b64 exec, s[16:17] @@ -684,19 +682,15 @@ define void @mubuf_vgpr_adjacent_in_block(<4 x i32> %i, <4 x i32> %j, i32 %c, pt ; W64-O0-NEXT: v_readlane_b32 s10, v17, 13 ; W64-O0-NEXT: v_readlane_b32 s11, v17, 14 ; W64-O0-NEXT: v_readlane_b32 s6, v17, 0 -; W64-O0-NEXT: s_nop 4 +; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload +; W64-O0-NEXT: s_waitcnt vmcnt(0) +; W64-O0-NEXT: s_nop 2 ; W64-O0-NEXT: buffer_load_format_x v0, v0, s[8:11], s6 idxen ; W64-O0-NEXT: s_waitcnt vmcnt(0) ; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill ; W64-O0-NEXT: s_xor_b64 exec, exec, s[4:5] ; W64-O0-NEXT: s_cbranch_execnz .LBB1_4 ; W64-O0-NEXT: ; %bb.6: -; W64-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload -; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload -; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload -; W64-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload -; W64-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload -; W64-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload ; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 ; W64-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 ; 4-byte Folded Reload ; W64-O0-NEXT: s_mov_b64 exec, s[16:17] @@ -704,6 +698,13 @@ define void @mubuf_vgpr_adjacent_in_block(<4 x i32> %i, <4 x i32> %j, i32 %c, pt ; W64-O0-NEXT: v_readlane_b32 s4, v17, 9 ; W64-O0-NEXT: v_readlane_b32 s5, v17, 10 ; W64-O0-NEXT: s_mov_b64 exec, s[4:5] +; W64-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload +; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload +; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload +; W64-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload +; W64-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload +; W64-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload +; W64-O0-NEXT: s_waitcnt vmcnt(0) ; W64-O0-NEXT: global_store_dword v[3:4], v5, off ; W64-O0-NEXT: s_waitcnt vmcnt(0) ; W64-O0-NEXT: global_store_dword v[0:1], v2, off @@ -1084,23 +1085,23 @@ define void @mubuf_vgpr_outside_entry(<4 x i32> %i, <4 x i32> %j, i32 %c, ptr ad ; W64-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 ; 4-byte Folded Spill ; W64-O0-NEXT: s_mov_b64 exec, s[16:17] ; W64-O0-NEXT: .LBB2_1: ; =>This Inner Loop Header: Depth=1 +; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 +; W64-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 ; 4-byte Folded Reload +; W64-O0-NEXT: s_mov_b64 exec, s[16:17] ; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload ; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload ; W64-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload ; W64-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload -; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 -; W64-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 ; 4-byte Folded Reload -; W64-O0-NEXT: s_mov_b64 exec, s[16:17] -; W64-O0-NEXT: s_waitcnt vmcnt(4) -; W64-O0-NEXT: v_readfirstlane_b32 s8, v0 ; W64-O0-NEXT: s_waitcnt vmcnt(3) +; W64-O0-NEXT: v_readfirstlane_b32 s8, v0 +; W64-O0-NEXT: s_waitcnt vmcnt(2) ; W64-O0-NEXT: v_readfirstlane_b32 s12, v1 ; W64-O0-NEXT: s_mov_b32 s4, s8 ; W64-O0-NEXT: s_mov_b32 s5, s12 ; W64-O0-NEXT: v_cmp_eq_u64_e64 s[4:5], s[4:5], v[0:1] -; W64-O0-NEXT: s_waitcnt vmcnt(2) -; W64-O0-NEXT: v_readfirstlane_b32 s7, v2 ; W64-O0-NEXT: s_waitcnt vmcnt(1) +; W64-O0-NEXT: v_readfirstlane_b32 s7, v2 +; W64-O0-NEXT: s_waitcnt vmcnt(0) ; W64-O0-NEXT: v_readfirstlane_b32 s6, v3 ; W64-O0-NEXT: s_mov_b32 s10, s7 ; W64-O0-NEXT: s_mov_b32 s11, s6 @@ -1110,7 +1111,6 @@ define void @mubuf_vgpr_outside_entry(<4 x i32> %i, <4 x i32> %j, i32 %c, ptr ad ; W64-O0-NEXT: s_mov_b32 s9, s12 ; W64-O0-NEXT: s_mov_b32 s10, s7 ; W64-O0-NEXT: s_mov_b32 s11, s6 -; W64-O0-NEXT: s_waitcnt vmcnt(0) ; W64-O0-NEXT: v_writelane_b32 v13, s8, 4 ; W64-O0-NEXT: v_writelane_b32 v13, s9, 5 ; W64-O0-NEXT: v_writelane_b32 v13, s10, 6 @@ -1122,7 +1122,6 @@ define void @mubuf_vgpr_outside_entry(<4 x i32> %i, <4 x i32> %j, i32 %c, ptr ad ; W64-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 ; 4-byte Folded Spill ; W64-O0-NEXT: s_mov_b64 exec, s[16:17] ; W64-O0-NEXT: ; %bb.2: ; in Loop: Header=BB2_1 Depth=1 -; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload ; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 ; W64-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 ; 4-byte Folded Reload ; W64-O0-NEXT: s_mov_b64 exec, s[16:17] @@ -1134,15 +1133,15 @@ define void @mubuf_vgpr_outside_entry(<4 x i32> %i, <4 x i32> %j, i32 %c, ptr ad ; W64-O0-NEXT: v_readlane_b32 s10, v13, 6 ; W64-O0-NEXT: v_readlane_b32 s11, v13, 7 ; W64-O0-NEXT: v_readlane_b32 s6, v13, 1 -; W64-O0-NEXT: s_nop 4 +; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload +; W64-O0-NEXT: s_waitcnt vmcnt(0) +; W64-O0-NEXT: s_nop 2 ; W64-O0-NEXT: buffer_load_format_x v0, v0, s[8:11], s6 idxen ; W64-O0-NEXT: s_waitcnt vmcnt(0) ; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; W64-O0-NEXT: s_xor_b64 exec, exec, s[4:5] ; W64-O0-NEXT: s_cbranch_execnz .LBB2_1 ; W64-O0-NEXT: ; %bb.3: -; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload -; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload ; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 ; W64-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 ; 4-byte Folded Reload ; W64-O0-NEXT: s_mov_b64 exec, s[16:17] @@ -1151,7 +1150,10 @@ define void @mubuf_vgpr_outside_entry(<4 x i32> %i, <4 x i32> %j, i32 %c, ptr ad ; W64-O0-NEXT: v_readlane_b32 s7, v13, 3 ; W64-O0-NEXT: s_mov_b64 exec, s[6:7] ; W64-O0-NEXT: v_readlane_b32 s4, v13, 1 +; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload +; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload ; W64-O0-NEXT: s_mov_b32 s5, 0x3ff +; W64-O0-NEXT: s_waitcnt vmcnt(0) ; W64-O0-NEXT: v_and_b32_e64 v1, v1, s5 ; W64-O0-NEXT: v_cmp_eq_u32_e64 s[6:7], v1, s4 ; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill @@ -1182,23 +1184,23 @@ define void @mubuf_vgpr_outside_entry(<4 x i32> %i, <4 x i32> %j, i32 %c, ptr ad ; W64-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 ; 4-byte Folded Spill ; W64-O0-NEXT: s_mov_b64 exec, s[16:17] ; W64-O0-NEXT: .LBB2_5: ; =>This Inner Loop Header: Depth=1 +; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 +; W64-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 ; 4-byte Folded Reload +; W64-O0-NEXT: s_mov_b64 exec, s[16:17] ; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload ; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload ; W64-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload ; W64-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload -; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 -; W64-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 ; 4-byte Folded Reload -; W64-O0-NEXT: s_mov_b64 exec, s[16:17] -; W64-O0-NEXT: s_waitcnt vmcnt(4) -; W64-O0-NEXT: v_readfirstlane_b32 s8, v0 ; W64-O0-NEXT: s_waitcnt vmcnt(3) +; W64-O0-NEXT: v_readfirstlane_b32 s8, v0 +; W64-O0-NEXT: s_waitcnt vmcnt(2) ; W64-O0-NEXT: v_readfirstlane_b32 s12, v1 ; W64-O0-NEXT: s_mov_b32 s4, s8 ; W64-O0-NEXT: s_mov_b32 s5, s12 ; W64-O0-NEXT: v_cmp_eq_u64_e64 s[4:5], s[4:5], v[0:1] -; W64-O0-NEXT: s_waitcnt vmcnt(2) -; W64-O0-NEXT: v_readfirstlane_b32 s7, v2 ; W64-O0-NEXT: s_waitcnt vmcnt(1) +; W64-O0-NEXT: v_readfirstlane_b32 s7, v2 +; W64-O0-NEXT: s_waitcnt vmcnt(0) ; W64-O0-NEXT: v_readfirstlane_b32 s6, v3 ; W64-O0-NEXT: s_mov_b32 s10, s7 ; W64-O0-NEXT: s_mov_b32 s11, s6 @@ -1208,7 +1210,6 @@ define void @mubuf_vgpr_outside_entry(<4 x i32> %i, <4 x i32> %j, i32 %c, ptr ad ; W64-O0-NEXT: s_mov_b32 s9, s12 ; W64-O0-NEXT: s_mov_b32 s10, s7 ; W64-O0-NEXT: s_mov_b32 s11, s6 -; W64-O0-NEXT: s_waitcnt vmcnt(0) ; W64-O0-NEXT: v_writelane_b32 v13, s8, 15 ; W64-O0-NEXT: v_writelane_b32 v13, s9, 16 ; W64-O0-NEXT: v_writelane_b32 v13, s10, 17 @@ -1220,7 +1221,6 @@ define void @mubuf_vgpr_outside_entry(<4 x i32> %i, <4 x i32> %j, i32 %c, ptr ad ; W64-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 ; 4-byte Folded Spill ; W64-O0-NEXT: s_mov_b64 exec, s[16:17] ; W64-O0-NEXT: ; %bb.6: ; in Loop: Header=BB2_5 Depth=1 -; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload ; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 ; W64-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 ; 4-byte Folded Reload ; W64-O0-NEXT: s_mov_b64 exec, s[16:17] @@ -1232,14 +1232,15 @@ define void @mubuf_vgpr_outside_entry(<4 x i32> %i, <4 x i32> %j, i32 %c, ptr ad ; W64-O0-NEXT: v_readlane_b32 s10, v13, 17 ; W64-O0-NEXT: v_readlane_b32 s11, v13, 18 ; W64-O0-NEXT: v_readlane_b32 s6, v13, 12 -; W64-O0-NEXT: s_nop 4 +; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload +; W64-O0-NEXT: s_waitcnt vmcnt(0) +; W64-O0-NEXT: s_nop 2 ; W64-O0-NEXT: buffer_load_format_x v0, v0, s[8:11], s6 idxen ; W64-O0-NEXT: s_waitcnt vmcnt(0) ; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill ; W64-O0-NEXT: s_xor_b64 exec, exec, s[4:5] ; W64-O0-NEXT: s_cbranch_execnz .LBB2_5 ; W64-O0-NEXT: ; %bb.7: -; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload ; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 ; W64-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 ; 4-byte Folded Reload ; W64-O0-NEXT: s_mov_b64 exec, s[16:17] @@ -1247,12 +1248,10 @@ define void @mubuf_vgpr_outside_entry(<4 x i32> %i, <4 x i32> %j, i32 %c, ptr ad ; W64-O0-NEXT: v_readlane_b32 s4, v13, 13 ; W64-O0-NEXT: v_readlane_b32 s5, v13, 14 ; W64-O0-NEXT: s_mov_b64 exec, s[4:5] +; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload +; W64-O0-NEXT: s_waitcnt vmcnt(0) ; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill ; W64-O0-NEXT: .LBB2_8: ; %bb2 -; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload -; W64-O0-NEXT: s_nop 0 -; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload -; W64-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload ; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 ; W64-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 ; 4-byte Folded Reload ; W64-O0-NEXT: s_mov_b64 exec, s[16:17] @@ -1260,6 +1259,10 @@ define void @mubuf_vgpr_outside_entry(<4 x i32> %i, <4 x i32> %j, i32 %c, ptr ad ; W64-O0-NEXT: v_readlane_b32 s4, v13, 10 ; W64-O0-NEXT: v_readlane_b32 s5, v13, 11 ; W64-O0-NEXT: s_or_b64 exec, exec, s[4:5] +; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload +; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload +; W64-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload +; W64-O0-NEXT: s_waitcnt vmcnt(0) ; W64-O0-NEXT: global_store_dword v[0:1], v2, off ; W64-O0-NEXT: s_waitcnt vmcnt(0) ; W64-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1 diff --git a/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll b/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll index 59ceecbf43b7856..896cb6042e810bc 100644 --- a/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll +++ b/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll @@ -182,23 +182,23 @@ define float @mubuf_vgpr(ptr addrspace(8) %i, i32 %c) #0 { ; W64-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 ; 4-byte Folded Spill ; W64-O0-NEXT: s_mov_b64 exec, s[16:17] ; W64-O0-NEXT: .LBB0_1: ; =>This Inner Loop Header: Depth=1 +; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 +; W64-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 ; 4-byte Folded Reload +; W64-O0-NEXT: s_mov_b64 exec, s[16:17] ; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload ; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload ; W64-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload ; W64-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload -; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 -; W64-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 ; 4-byte Folded Reload -; W64-O0-NEXT: s_mov_b64 exec, s[16:17] -; W64-O0-NEXT: s_waitcnt vmcnt(4) -; W64-O0-NEXT: v_readfirstlane_b32 s8, v0 ; W64-O0-NEXT: s_waitcnt vmcnt(3) +; W64-O0-NEXT: v_readfirstlane_b32 s8, v0 +; W64-O0-NEXT: s_waitcnt vmcnt(2) ; W64-O0-NEXT: v_readfirstlane_b32 s12, v1 ; W64-O0-NEXT: s_mov_b32 s4, s8 ; W64-O0-NEXT: s_mov_b32 s5, s12 ; W64-O0-NEXT: v_cmp_eq_u64_e64 s[4:5], s[4:5], v[0:1] -; W64-O0-NEXT: s_waitcnt vmcnt(2) -; W64-O0-NEXT: v_readfirstlane_b32 s7, v2 ; W64-O0-NEXT: s_waitcnt vmcnt(1) +; W64-O0-NEXT: v_readfirstlane_b32 s7, v2 +; W64-O0-NEXT: s_waitcnt vmcnt(0) ; W64-O0-NEXT: v_readfirstlane_b32 s6, v3 ; W64-O0-NEXT: s_mov_b32 s10, s7 ; W64-O0-NEXT: s_mov_b32 s11, s6 @@ -208,7 +208,6 @@ define float @mubuf_vgpr(ptr addrspace(8) %i, i32 %c) #0 { ; W64-O0-NEXT: s_mov_b32 s9, s12 ; W64-O0-NEXT: s_mov_b32 s10, s7 ; W64-O0-NEXT: s_mov_b32 s11, s6 -; W64-O0-NEXT: s_waitcnt vmcnt(0) ; W64-O0-NEXT: v_writelane_b32 v7, s8, 3 ; W64-O0-NEXT: v_writelane_b32 v7, s9, 4 ; W64-O0-NEXT: v_writelane_b32 v7, s10, 5 @@ -220,7 +219,6 @@ define float @mubuf_vgpr(ptr addrspace(8) %i, i32 %c) #0 { ; W64-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 ; 4-byte Folded Spill ; W64-O0-NEXT: s_mov_b64 exec, s[16:17] ; W64-O0-NEXT: ; %bb.2: ; in Loop: Header=BB0_1 Depth=1 -; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload ; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 ; W64-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 ; 4-byte Folded Reload ; W64-O0-NEXT: s_mov_b64 exec, s[16:17] @@ -232,14 +230,15 @@ define float @mubuf_vgpr(ptr addrspace(8) %i, i32 %c) #0 { ; W64-O0-NEXT: v_readlane_b32 s10, v7, 5 ; W64-O0-NEXT: v_readlane_b32 s11, v7, 6 ; W64-O0-NEXT: v_readlane_b32 s6, v7, 0 -; W64-O0-NEXT: s_nop 4 +; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload +; W64-O0-NEXT: s_waitcnt vmcnt(0) +; W64-O0-NEXT: s_nop 2 ; W64-O0-NEXT: buffer_load_format_x v0, v0, s[8:11], s6 idxen ; W64-O0-NEXT: s_waitcnt vmcnt(0) ; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill ; W64-O0-NEXT: s_xor_b64 exec, exec, s[4:5] ; W64-O0-NEXT: s_cbranch_execnz .LBB0_1 ; W64-O0-NEXT: ; %bb.3: -; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload ; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 ; W64-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 ; 4-byte Folded Reload ; W64-O0-NEXT: s_mov_b64 exec, s[16:17] @@ -247,6 +246,7 @@ define float @mubuf_vgpr(ptr addrspace(8) %i, i32 %c) #0 { ; W64-O0-NEXT: v_readlane_b32 s4, v7, 1 ; W64-O0-NEXT: v_readlane_b32 s5, v7, 2 ; W64-O0-NEXT: s_mov_b64 exec, s[4:5] +; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload ; W64-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; W64-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload ; W64-O0-NEXT: s_mov_b64 exec, s[4:5] @@ -598,23 +598,23 @@ define void @mubuf_vgpr_adjacent_in_block(ptr addrspace(8) %i, ptr addrspace(8) ; W64-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 ; 4-byte Folded Spill ; W64-O0-NEXT: s_mov_b64 exec, s[16:17] ; W64-O0-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 +; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 +; W64-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 ; 4-byte Folded Reload +; W64-O0-NEXT: s_mov_b64 exec, s[16:17] ; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload ; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload ; W64-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload ; W64-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload -; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 -; W64-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 ; 4-byte Folded Reload -; W64-O0-NEXT: s_mov_b64 exec, s[16:17] -; W64-O0-NEXT: s_waitcnt vmcnt(4) -; W64-O0-NEXT: v_readfirstlane_b32 s8, v0 ; W64-O0-NEXT: s_waitcnt vmcnt(3) +; W64-O0-NEXT: v_readfirstlane_b32 s8, v0 +; W64-O0-NEXT: s_waitcnt vmcnt(2) ; W64-O0-NEXT: v_readfirstlane_b32 s12, v1 ; W64-O0-NEXT: s_mov_b32 s4, s8 ; W64-O0-NEXT: s_mov_b32 s5, s12 ; W64-O0-NEXT: v_cmp_eq_u64_e64 s[4:5], s[4:5], v[0:1] -; W64-O0-NEXT: s_waitcnt vmcnt(2) -; W64-O0-NEXT: v_readfirstlane_b32 s7, v2 ; W64-O0-NEXT: s_waitcnt vmcnt(1) +; W64-O0-NEXT: v_readfirstlane_b32 s7, v2 +; W64-O0-NEXT: s_waitcnt vmcnt(0) ; W64-O0-NEXT: v_readfirstlane_b32 s6, v3 ; W64-O0-NEXT: s_mov_b32 s10, s7 ; W64-O0-NEXT: s_mov_b32 s11, s6 @@ -624,7 +624,6 @@ define void @mubuf_vgpr_adjacent_in_block(ptr addrspace(8) %i, ptr addrspace(8) ; W64-O0-NEXT: s_mov_b32 s9, s12 ; W64-O0-NEXT: s_mov_b32 s10, s7 ; W64-O0-NEXT: s_mov_b32 s11, s6 -; W64-O0-NEXT: s_waitcnt vmcnt(0) ; W64-O0-NEXT: v_writelane_b32 v17, s8, 3 ; W64-O0-NEXT: v_writelane_b32 v17, s9, 4 ; W64-O0-NEXT: v_writelane_b32 v17, s10, 5 @@ -636,7 +635,6 @@ define void @mubuf_vgpr_adjacent_in_block(ptr addrspace(8) %i, ptr addrspace(8) ; W64-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 ; 4-byte Folded Spill ; W64-O0-NEXT: s_mov_b64 exec, s[16:17] ; W64-O0-NEXT: ; %bb.2: ; in Loop: Header=BB1_1 Depth=1 -; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload ; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 ; W64-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 ; 4-byte Folded Reload ; W64-O0-NEXT: s_mov_b64 exec, s[16:17] @@ -648,7 +646,9 @@ define void @mubuf_vgpr_adjacent_in_block(ptr addrspace(8) %i, ptr addrspace(8) ; W64-O0-NEXT: v_readlane_b32 s10, v17, 5 ; W64-O0-NEXT: v_readlane_b32 s11, v17, 6 ; W64-O0-NEXT: v_readlane_b32 s6, v17, 0 -; W64-O0-NEXT: s_nop 4 +; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload +; W64-O0-NEXT: s_waitcnt vmcnt(0) +; W64-O0-NEXT: s_nop 2 ; W64-O0-NEXT: buffer_load_format_x v0, v0, s[8:11], s6 idxen ; W64-O0-NEXT: s_waitcnt vmcnt(0) ; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill @@ -669,23 +669,23 @@ define void @mubuf_vgpr_adjacent_in_block(ptr addrspace(8) %i, ptr addrspace(8) ; W64-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 ; 4-byte Folded Spill ; W64-O0-NEXT: s_mov_b64 exec, s[16:17] ; W64-O0-NEXT: .LBB1_4: ; =>This Inner Loop Header: Depth=1 +; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 +; W64-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 ; 4-byte Folded Reload +; W64-O0-NEXT: s_mov_b64 exec, s[16:17] ; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload ; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload ; W64-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload ; W64-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload -; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 -; W64-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 ; 4-byte Folded Reload -; W64-O0-NEXT: s_mov_b64 exec, s[16:17] -; W64-O0-NEXT: s_waitcnt vmcnt(4) -; W64-O0-NEXT: v_readfirstlane_b32 s8, v0 ; W64-O0-NEXT: s_waitcnt vmcnt(3) +; W64-O0-NEXT: v_readfirstlane_b32 s8, v0 +; W64-O0-NEXT: s_waitcnt vmcnt(2) ; W64-O0-NEXT: v_readfirstlane_b32 s12, v1 ; W64-O0-NEXT: s_mov_b32 s4, s8 ; W64-O0-NEXT: s_mov_b32 s5, s12 ; W64-O0-NEXT: v_cmp_eq_u64_e64 s[4:5], s[4:5], v[0:1] -; W64-O0-NEXT: s_waitcnt vmcnt(2) -; W64-O0-NEXT: v_readfirstlane_b32 s7, v2 ; W64-O0-NEXT: s_waitcnt vmcnt(1) +; W64-O0-NEXT: v_readfirstlane_b32 s7, v2 +; W64-O0-NEXT: s_waitcnt vmcnt(0) ; W64-O0-NEXT: v_readfirstlane_b32 s6, v3 ; W64-O0-NEXT: s_mov_b32 s10, s7 ; W64-O0-NEXT: s_mov_b32 s11, s6 @@ -695,7 +695,6 @@ define void @mubuf_vgpr_adjacent_in_block(ptr addrspace(8) %i, ptr addrspace(8) ; W64-O0-NEXT: s_mov_b32 s9, s12 ; W64-O0-NEXT: s_mov_b32 s10, s7 ; W64-O0-NEXT: s_mov_b32 s11, s6 -; W64-O0-NEXT: s_waitcnt vmcnt(0) ; W64-O0-NEXT: v_writelane_b32 v17, s8, 11 ; W64-O0-NEXT: v_writelane_b32 v17, s9, 12 ; W64-O0-NEXT: v_writelane_b32 v17, s10, 13 @@ -707,7 +706,6 @@ define void @mubuf_vgpr_adjacent_in_block(ptr addrspace(8) %i, ptr addrspace(8) ; W64-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 ; 4-byte Folded Spill ; W64-O0-NEXT: s_mov_b64 exec, s[16:17] ; W64-O0-NEXT: ; %bb.5: ; in Loop: Header=BB1_4 Depth=1 -; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload ; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 ; W64-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 ; 4-byte Folded Reload ; W64-O0-NEXT: s_mov_b64 exec, s[16:17] @@ -719,19 +717,15 @@ define void @mubuf_vgpr_adjacent_in_block(ptr addrspace(8) %i, ptr addrspace(8) ; W64-O0-NEXT: v_readlane_b32 s10, v17, 13 ; W64-O0-NEXT: v_readlane_b32 s11, v17, 14 ; W64-O0-NEXT: v_readlane_b32 s6, v17, 0 -; W64-O0-NEXT: s_nop 4 +; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload +; W64-O0-NEXT: s_waitcnt vmcnt(0) +; W64-O0-NEXT: s_nop 2 ; W64-O0-NEXT: buffer_load_format_x v0, v0, s[8:11], s6 idxen ; W64-O0-NEXT: s_waitcnt vmcnt(0) ; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill ; W64-O0-NEXT: s_xor_b64 exec, exec, s[4:5] ; W64-O0-NEXT: s_cbranch_execnz .LBB1_4 ; W64-O0-NEXT: ; %bb.6: -; W64-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload -; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload -; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload -; W64-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload -; W64-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload -; W64-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload ; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 ; W64-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 ; 4-byte Folded Reload ; W64-O0-NEXT: s_mov_b64 exec, s[16:17] @@ -739,6 +733,13 @@ define void @mubuf_vgpr_adjacent_in_block(ptr addrspace(8) %i, ptr addrspace(8) ; W64-O0-NEXT: v_readlane_b32 s4, v17, 9 ; W64-O0-NEXT: v_readlane_b32 s5, v17, 10 ; W64-O0-NEXT: s_mov_b64 exec, s[4:5] +; W64-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload +; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload +; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload +; W64-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload +; W64-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload +; W64-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload +; W64-O0-NEXT: s_waitcnt vmcnt(0) ; W64-O0-NEXT: global_store_dword v[3:4], v5, off ; W64-O0-NEXT: s_waitcnt vmcnt(0) ; W64-O0-NEXT: global_store_dword v[0:1], v2, off @@ -1135,23 +1136,23 @@ define void @mubuf_vgpr_outside_entry(ptr addrspace(8) %i, ptr addrspace(8) %j, ; W64-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 ; 4-byte Folded Spill ; W64-O0-NEXT: s_mov_b64 exec, s[16:17] ; W64-O0-NEXT: .LBB2_1: ; =>This Inner Loop Header: Depth=1 +; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 +; W64-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 ; 4-byte Folded Reload +; W64-O0-NEXT: s_mov_b64 exec, s[16:17] ; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload ; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload ; W64-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload ; W64-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload -; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 -; W64-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 ; 4-byte Folded Reload -; W64-O0-NEXT: s_mov_b64 exec, s[16:17] -; W64-O0-NEXT: s_waitcnt vmcnt(4) -; W64-O0-NEXT: v_readfirstlane_b32 s8, v0 ; W64-O0-NEXT: s_waitcnt vmcnt(3) +; W64-O0-NEXT: v_readfirstlane_b32 s8, v0 +; W64-O0-NEXT: s_waitcnt vmcnt(2) ; W64-O0-NEXT: v_readfirstlane_b32 s12, v1 ; W64-O0-NEXT: s_mov_b32 s4, s8 ; W64-O0-NEXT: s_mov_b32 s5, s12 ; W64-O0-NEXT: v_cmp_eq_u64_e64 s[4:5], s[4:5], v[0:1] -; W64-O0-NEXT: s_waitcnt vmcnt(2) -; W64-O0-NEXT: v_readfirstlane_b32 s7, v2 ; W64-O0-NEXT: s_waitcnt vmcnt(1) +; W64-O0-NEXT: v_readfirstlane_b32 s7, v2 +; W64-O0-NEXT: s_waitcnt vmcnt(0) ; W64-O0-NEXT: v_readfirstlane_b32 s6, v3 ; W64-O0-NEXT: s_mov_b32 s10, s7 ; W64-O0-NEXT: s_mov_b32 s11, s6 @@ -1161,7 +1162,6 @@ define void @mubuf_vgpr_outside_entry(ptr addrspace(8) %i, ptr addrspace(8) %j, ; W64-O0-NEXT: s_mov_b32 s9, s12 ; W64-O0-NEXT: s_mov_b32 s10, s7 ; W64-O0-NEXT: s_mov_b32 s11, s6 -; W64-O0-NEXT: s_waitcnt vmcnt(0) ; W64-O0-NEXT: v_writelane_b32 v13, s8, 4 ; W64-O0-NEXT: v_writelane_b32 v13, s9, 5 ; W64-O0-NEXT: v_writelane_b32 v13, s10, 6 @@ -1173,7 +1173,6 @@ define void @mubuf_vgpr_outside_entry(ptr addrspace(8) %i, ptr addrspace(8) %j, ; W64-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 ; 4-byte Folded Spill ; W64-O0-NEXT: s_mov_b64 exec, s[16:17] ; W64-O0-NEXT: ; %bb.2: ; in Loop: Header=BB2_1 Depth=1 -; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload ; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 ; W64-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 ; 4-byte Folded Reload ; W64-O0-NEXT: s_mov_b64 exec, s[16:17] @@ -1185,15 +1184,15 @@ define void @mubuf_vgpr_outside_entry(ptr addrspace(8) %i, ptr addrspace(8) %j, ; W64-O0-NEXT: v_readlane_b32 s10, v13, 6 ; W64-O0-NEXT: v_readlane_b32 s11, v13, 7 ; W64-O0-NEXT: v_readlane_b32 s6, v13, 1 -; W64-O0-NEXT: s_nop 4 +; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload +; W64-O0-NEXT: s_waitcnt vmcnt(0) +; W64-O0-NEXT: s_nop 2 ; W64-O0-NEXT: buffer_load_format_x v0, v0, s[8:11], s6 idxen ; W64-O0-NEXT: s_waitcnt vmcnt(0) ; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill ; W64-O0-NEXT: s_xor_b64 exec, exec, s[4:5] ; W64-O0-NEXT: s_cbranch_execnz .LBB2_1 ; W64-O0-NEXT: ; %bb.3: -; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload -; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload ; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 ; W64-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 ; 4-byte Folded Reload ; W64-O0-NEXT: s_mov_b64 exec, s[16:17] @@ -1202,7 +1201,10 @@ define void @mubuf_vgpr_outside_entry(ptr addrspace(8) %i, ptr addrspace(8) %j, ; W64-O0-NEXT: v_readlane_b32 s7, v13, 3 ; W64-O0-NEXT: s_mov_b64 exec, s[6:7] ; W64-O0-NEXT: v_readlane_b32 s4, v13, 1 +; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload +; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload ; W64-O0-NEXT: s_mov_b32 s5, 0x3ff +; W64-O0-NEXT: s_waitcnt vmcnt(0) ; W64-O0-NEXT: v_and_b32_e64 v1, v1, s5 ; W64-O0-NEXT: v_cmp_eq_u32_e64 s[6:7], v1, s4 ; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill @@ -1216,15 +1218,16 @@ define void @mubuf_vgpr_outside_entry(ptr addrspace(8) %i, ptr addrspace(8) %j, ; W64-O0-NEXT: s_mov_b64 exec, s[4:5] ; W64-O0-NEXT: s_cbranch_execz .LBB2_8 ; W64-O0-NEXT: ; %bb.4: ; %bb1 -; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload -; W64-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload -; W64-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload -; W64-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload ; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 ; W64-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 ; 4-byte Folded Reload ; W64-O0-NEXT: s_mov_b64 exec, s[16:17] ; W64-O0-NEXT: s_waitcnt vmcnt(0) ; W64-O0-NEXT: v_readlane_b32 s4, v13, 0 +; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload +; W64-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload +; W64-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload +; W64-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload +; W64-O0-NEXT: s_waitcnt vmcnt(0) ; W64-O0-NEXT: v_mov_b32_e32 v6, v4 ; W64-O0-NEXT: v_mov_b32_e32 v0, v3 ; W64-O0-NEXT: v_mov_b32_e32 v4, v2 @@ -1253,23 +1256,23 @@ define void @mubuf_vgpr_outside_entry(ptr addrspace(8) %i, ptr addrspace(8) %j, ; W64-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 ; 4-byte Folded Spill ; W64-O0-NEXT: s_mov_b64 exec, s[16:17] ; W64-O0-NEXT: .LBB2_5: ; =>This Inner Loop Header: Depth=1 +; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 +; W64-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 ; 4-byte Folded Reload +; W64-O0-NEXT: s_mov_b64 exec, s[16:17] ; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload ; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:84 ; 4-byte Folded Reload ; W64-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:88 ; 4-byte Folded Reload ; W64-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:92 ; 4-byte Folded Reload -; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 -; W64-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 ; 4-byte Folded Reload -; W64-O0-NEXT: s_mov_b64 exec, s[16:17] -; W64-O0-NEXT: s_waitcnt vmcnt(4) -; W64-O0-NEXT: v_readfirstlane_b32 s8, v0 ; W64-O0-NEXT: s_waitcnt vmcnt(3) +; W64-O0-NEXT: v_readfirstlane_b32 s8, v0 +; W64-O0-NEXT: s_waitcnt vmcnt(2) ; W64-O0-NEXT: v_readfirstlane_b32 s12, v1 ; W64-O0-NEXT: s_mov_b32 s4, s8 ; W64-O0-NEXT: s_mov_b32 s5, s12 ; W64-O0-NEXT: v_cmp_eq_u64_e64 s[4:5], s[4:5], v[0:1] -; W64-O0-NEXT: s_waitcnt vmcnt(2) -; W64-O0-NEXT: v_readfirstlane_b32 s7, v2 ; W64-O0-NEXT: s_waitcnt vmcnt(1) +; W64-O0-NEXT: v_readfirstlane_b32 s7, v2 +; W64-O0-NEXT: s_waitcnt vmcnt(0) ; W64-O0-NEXT: v_readfirstlane_b32 s6, v3 ; W64-O0-NEXT: s_mov_b32 s10, s7 ; W64-O0-NEXT: s_mov_b32 s11, s6 @@ -1279,7 +1282,6 @@ define void @mubuf_vgpr_outside_entry(ptr addrspace(8) %i, ptr addrspace(8) %j, ; W64-O0-NEXT: s_mov_b32 s9, s12 ; W64-O0-NEXT: s_mov_b32 s10, s7 ; W64-O0-NEXT: s_mov_b32 s11, s6 -; W64-O0-NEXT: s_waitcnt vmcnt(0) ; W64-O0-NEXT: v_writelane_b32 v13, s8, 15 ; W64-O0-NEXT: v_writelane_b32 v13, s9, 16 ; W64-O0-NEXT: v_writelane_b32 v13, s10, 17 @@ -1291,7 +1293,6 @@ define void @mubuf_vgpr_outside_entry(ptr addrspace(8) %i, ptr addrspace(8) %j, ; W64-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 ; 4-byte Folded Spill ; W64-O0-NEXT: s_mov_b64 exec, s[16:17] ; W64-O0-NEXT: ; %bb.6: ; in Loop: Header=BB2_5 Depth=1 -; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload ; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 ; W64-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 ; 4-byte Folded Reload ; W64-O0-NEXT: s_mov_b64 exec, s[16:17] @@ -1303,14 +1304,15 @@ define void @mubuf_vgpr_outside_entry(ptr addrspace(8) %i, ptr addrspace(8) %j, ; W64-O0-NEXT: v_readlane_b32 s10, v13, 17 ; W64-O0-NEXT: v_readlane_b32 s11, v13, 18 ; W64-O0-NEXT: v_readlane_b32 s6, v13, 12 -; W64-O0-NEXT: s_nop 4 +; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload +; W64-O0-NEXT: s_waitcnt vmcnt(0) +; W64-O0-NEXT: s_nop 2 ; W64-O0-NEXT: buffer_load_format_x v0, v0, s[8:11], s6 idxen ; W64-O0-NEXT: s_waitcnt vmcnt(0) ; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill ; W64-O0-NEXT: s_xor_b64 exec, exec, s[4:5] ; W64-O0-NEXT: s_cbranch_execnz .LBB2_5 ; W64-O0-NEXT: ; %bb.7: -; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:96 ; 4-byte Folded Reload ; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 ; W64-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 ; 4-byte Folded Reload ; W64-O0-NEXT: s_mov_b64 exec, s[16:17] @@ -1318,12 +1320,10 @@ define void @mubuf_vgpr_outside_entry(ptr addrspace(8) %i, ptr addrspace(8) %j, ; W64-O0-NEXT: v_readlane_b32 s4, v13, 13 ; W64-O0-NEXT: v_readlane_b32 s5, v13, 14 ; W64-O0-NEXT: s_mov_b64 exec, s[4:5] +; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:96 ; 4-byte Folded Reload +; W64-O0-NEXT: s_waitcnt vmcnt(0) ; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill ; W64-O0-NEXT: .LBB2_8: ; %bb2 -; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload -; W64-O0-NEXT: s_nop 0 -; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload -; W64-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload ; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 ; W64-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 ; 4-byte Folded Reload ; W64-O0-NEXT: s_mov_b64 exec, s[16:17] @@ -1331,6 +1331,10 @@ define void @mubuf_vgpr_outside_entry(ptr addrspace(8) %i, ptr addrspace(8) %j, ; W64-O0-NEXT: v_readlane_b32 s4, v13, 10 ; W64-O0-NEXT: v_readlane_b32 s5, v13, 11 ; W64-O0-NEXT: s_or_b64 exec, exec, s[4:5] +; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload +; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload +; W64-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload +; W64-O0-NEXT: s_waitcnt vmcnt(0) ; W64-O0-NEXT: global_store_dword v[0:1], v2, off ; W64-O0-NEXT: s_waitcnt vmcnt(0) ; W64-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1 diff --git a/llvm/test/CodeGen/AMDGPU/rem_i128.ll b/llvm/test/CodeGen/AMDGPU/rem_i128.ll index f7f5bd56fa6f16e..6583d5e8aa5a07d 100644 --- a/llvm/test/CodeGen/AMDGPU/rem_i128.ll +++ b/llvm/test/CodeGen/AMDGPU/rem_i128.ll @@ -622,10 +622,6 @@ define i128 @v_srem_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_branch .LBB0_5 ; GFX9-O0-NEXT: .LBB0_3: ; %Flow2 -; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload ; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1 ; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX9-O0-NEXT: s_mov_b64 exec, s[22:23] @@ -633,8 +629,13 @@ define i128 @v_srem_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: v_readlane_b32 s4, v30, 4 ; GFX9-O0-NEXT: v_readlane_b32 s5, v30, 5 ; GFX9-O0-NEXT: s_or_b64 exec, exec, s[4:5] +; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload +; GFX9-O0-NEXT: s_waitcnt vmcnt(1) ; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:192 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_nop 0 +; GFX9-O0-NEXT: s_waitcnt vmcnt(1) ; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:196 ; 4-byte Folded Spill ; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:184 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 @@ -681,6 +682,13 @@ define i128 @v_srem_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_branch .LBB0_3 ; GFX9-O0-NEXT: .LBB0_5: ; %Flow1 +; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1 +; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX9-O0-NEXT: s_mov_b64 exec, s[22:23] +; GFX9-O0-NEXT: s_waitcnt vmcnt(0) +; GFX9-O0-NEXT: v_readlane_b32 s4, v30, 8 +; GFX9-O0-NEXT: v_readlane_b32 s5, v30, 9 +; GFX9-O0-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:144 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:148 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:136 ; 4-byte Folded Reload @@ -689,15 +697,9 @@ define i128 @v_srem_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:132 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:120 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:124 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1 -; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_mov_b64 exec, s[22:23] -; GFX9-O0-NEXT: s_waitcnt vmcnt(0) -; GFX9-O0-NEXT: v_readlane_b32 s4, v30, 8 -; GFX9-O0-NEXT: v_readlane_b32 s5, v30, 9 -; GFX9-O0-NEXT: s_or_b64 exec, exec, s[4:5] +; GFX9-O0-NEXT: s_waitcnt vmcnt(1) ; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:208 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_nop 0 +; GFX9-O0-NEXT: s_waitcnt vmcnt(1) ; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:212 ; 4-byte Folded Spill ; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:200 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 @@ -711,6 +713,12 @@ define i128 @v_srem_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: s_branch .LBB0_4 ; GFX9-O0-NEXT: .LBB0_6: ; %udiv-do-while ; GFX9-O0-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1 +; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX9-O0-NEXT: s_mov_b64 exec, s[22:23] +; GFX9-O0-NEXT: s_waitcnt vmcnt(0) +; GFX9-O0-NEXT: v_readlane_b32 s6, v30, 10 +; GFX9-O0-NEXT: v_readlane_b32 s7, v30, 11 ; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:232 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:236 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:240 ; 4-byte Folded Reload @@ -735,13 +743,8 @@ define i128 @v_srem_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:300 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:304 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:308 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1 -; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_mov_b64 exec, s[22:23] -; GFX9-O0-NEXT: s_waitcnt vmcnt(0) -; GFX9-O0-NEXT: v_readlane_b32 s6, v30, 10 -; GFX9-O0-NEXT: v_readlane_b32 s7, v30, 11 ; GFX9-O0-NEXT: s_mov_b32 s4, 63 +; GFX9-O0-NEXT: s_waitcnt vmcnt(16) ; GFX9-O0-NEXT: v_lshrrev_b64 v[28:29], s4, v[2:3] ; GFX9-O0-NEXT: v_mov_b32_e32 v5, v29 ; GFX9-O0-NEXT: s_mov_b32 s5, 1 @@ -768,6 +771,7 @@ define i128 @v_srem_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: v_lshrrev_b64 v[0:1], s4, v[0:1] ; GFX9-O0-NEXT: v_mov_b32_e32 v7, v1 ; GFX9-O0-NEXT: v_mov_b32_e32 v6, v29 +; GFX9-O0-NEXT: s_waitcnt vmcnt(10) ; GFX9-O0-NEXT: v_mov_b32_e32 v10, v27 ; GFX9-O0-NEXT: v_or3_b32 v6, v6, v7, v10 ; GFX9-O0-NEXT: v_mov_b32_e32 v1, v0 @@ -777,6 +781,7 @@ define i128 @v_srem_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec ; GFX9-O0-NEXT: v_mov_b32_e32 v1, v6 ; GFX9-O0-NEXT: v_mov_b32_e32 v7, v3 +; GFX9-O0-NEXT: s_waitcnt vmcnt(8) ; GFX9-O0-NEXT: v_mov_b32_e32 v6, v25 ; GFX9-O0-NEXT: v_or_b32_e64 v6, v6, v7 ; GFX9-O0-NEXT: v_mov_b32_e32 v3, v2 @@ -788,10 +793,12 @@ define i128 @v_srem_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: v_mov_b32_e32 v10, v5 ; GFX9-O0-NEXT: v_mov_b32_e32 v4, v22 ; GFX9-O0-NEXT: v_mov_b32_e32 v5, v23 +; GFX9-O0-NEXT: s_waitcnt vmcnt(1) ; GFX9-O0-NEXT: v_mov_b32_e32 v13, v11 ; GFX9-O0-NEXT: v_mov_b32_e32 v11, v14 ; GFX9-O0-NEXT: v_mov_b32_e32 v7, v15 ; GFX9-O0-NEXT: v_sub_co_u32_e32 v13, vcc, v13, v6 +; GFX9-O0-NEXT: s_waitcnt vmcnt(0) ; GFX9-O0-NEXT: v_subb_co_u32_e32 v12, vcc, v12, v10, vcc ; GFX9-O0-NEXT: v_subb_co_u32_e32 v11, vcc, v11, v4, vcc ; GFX9-O0-NEXT: v_subb_co_u32_e32 v7, vcc, v7, v5, vcc @@ -931,6 +938,9 @@ define i128 @v_srem_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: s_cbranch_execnz .LBB0_6 ; GFX9-O0-NEXT: s_branch .LBB0_1 ; GFX9-O0-NEXT: .LBB0_7: ; %udiv-preheader +; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1 +; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX9-O0-NEXT: s_mov_b64 exec, s[22:23] ; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:312 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:316 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:320 ; 4-byte Folded Reload @@ -947,12 +957,9 @@ define i128 @v_srem_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1 -; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_mov_b64 exec, s[22:23] -; GFX9-O0-NEXT: s_waitcnt vmcnt(10) +; GFX9-O0-NEXT: s_waitcnt vmcnt(9) ; GFX9-O0-NEXT: v_mov_b32_e32 v4, v10 -; GFX9-O0-NEXT: s_waitcnt vmcnt(1) +; GFX9-O0-NEXT: s_waitcnt vmcnt(0) ; GFX9-O0-NEXT: v_lshrrev_b64 v[6:7], v4, v[20:21] ; GFX9-O0-NEXT: v_mov_b32_e32 v5, v7 ; GFX9-O0-NEXT: s_mov_b32 s6, 64 @@ -1033,7 +1040,6 @@ define i128 @v_srem_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: v_mov_b32_e32 v14, s8 ; GFX9-O0-NEXT: v_mov_b32_e32 v13, s7 ; GFX9-O0-NEXT: v_mov_b32_e32 v12, s6 -; GFX9-O0-NEXT: s_waitcnt vmcnt(4) ; GFX9-O0-NEXT: v_writelane_b32 v30, s4, 10 ; GFX9-O0-NEXT: v_writelane_b32 v30, s5, 11 ; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1 @@ -1065,6 +1071,9 @@ define i128 @v_srem_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:236 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_branch .LBB0_6 ; GFX9-O0-NEXT: .LBB0_8: ; %udiv-bb1 +; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1 +; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX9-O0-NEXT: s_mov_b64 exec, s[22:23] ; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload @@ -1073,12 +1082,9 @@ define i128 @v_srem_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1 -; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_mov_b64 exec, s[22:23] ; GFX9-O0-NEXT: s_mov_b64 s[6:7], 1 ; GFX9-O0-NEXT: s_mov_b32 s5, s6 -; GFX9-O0-NEXT: s_waitcnt vmcnt(2) +; GFX9-O0-NEXT: s_waitcnt vmcnt(1) ; GFX9-O0-NEXT: v_mov_b32_e32 v3, v0 ; GFX9-O0-NEXT: s_mov_b32 s4, s7 ; GFX9-O0-NEXT: s_mov_b64 s[6:7], 0 @@ -1089,7 +1095,7 @@ define i128 @v_srem_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: v_mov_b32_e32 v4, s5 ; GFX9-O0-NEXT: v_add_co_u32_e32 v8, vcc, v3, v4 ; GFX9-O0-NEXT: v_mov_b32_e32 v4, s4 -; GFX9-O0-NEXT: s_waitcnt vmcnt(1) +; GFX9-O0-NEXT: s_waitcnt vmcnt(0) ; GFX9-O0-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v4, vcc ; GFX9-O0-NEXT: v_mov_b32_e32 v4, s8 ; GFX9-O0-NEXT: v_addc_co_u32_e32 v0, vcc, v0, v4, vcc @@ -1193,7 +1199,6 @@ define i128 @v_srem_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: s_mov_b64 s[6:7], exec ; GFX9-O0-NEXT: s_and_b64 s[4:5], s[6:7], s[4:5] ; GFX9-O0-NEXT: s_xor_b64 s[6:7], s[4:5], s[6:7] -; GFX9-O0-NEXT: s_waitcnt vmcnt(16) ; GFX9-O0-NEXT: v_writelane_b32 v30, s6, 8 ; GFX9-O0-NEXT: v_writelane_b32 v30, s7, 9 ; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1 @@ -1999,10 +2004,6 @@ define i128 @v_urem_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_branch .LBB1_5 ; GFX9-O0-NEXT: .LBB1_3: ; %Flow2 -; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload ; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1 ; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX9-O0-NEXT: s_mov_b64 exec, s[18:19] @@ -2010,8 +2011,13 @@ define i128 @v_urem_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: v_readlane_b32 s4, v30, 2 ; GFX9-O0-NEXT: v_readlane_b32 s5, v30, 3 ; GFX9-O0-NEXT: s_or_b64 exec, exec, s[4:5] +; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload +; GFX9-O0-NEXT: s_waitcnt vmcnt(1) ; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:176 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_nop 0 +; GFX9-O0-NEXT: s_waitcnt vmcnt(1) ; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:180 ; 4-byte Folded Spill ; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:168 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 @@ -2058,6 +2064,13 @@ define i128 @v_urem_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_branch .LBB1_3 ; GFX9-O0-NEXT: .LBB1_5: ; %Flow1 +; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1 +; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX9-O0-NEXT: s_mov_b64 exec, s[18:19] +; GFX9-O0-NEXT: s_waitcnt vmcnt(0) +; GFX9-O0-NEXT: v_readlane_b32 s4, v30, 6 +; GFX9-O0-NEXT: v_readlane_b32 s5, v30, 7 +; GFX9-O0-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:128 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:132 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:120 ; 4-byte Folded Reload @@ -2066,15 +2079,9 @@ define i128 @v_urem_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:116 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:104 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:108 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1 -; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_mov_b64 exec, s[18:19] -; GFX9-O0-NEXT: s_waitcnt vmcnt(0) -; GFX9-O0-NEXT: v_readlane_b32 s4, v30, 6 -; GFX9-O0-NEXT: v_readlane_b32 s5, v30, 7 -; GFX9-O0-NEXT: s_or_b64 exec, exec, s[4:5] +; GFX9-O0-NEXT: s_waitcnt vmcnt(1) ; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:192 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_nop 0 +; GFX9-O0-NEXT: s_waitcnt vmcnt(1) ; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:196 ; 4-byte Folded Spill ; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:184 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 @@ -2088,6 +2095,12 @@ define i128 @v_urem_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: s_branch .LBB1_4 ; GFX9-O0-NEXT: .LBB1_6: ; %udiv-do-while ; GFX9-O0-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1 +; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX9-O0-NEXT: s_mov_b64 exec, s[18:19] +; GFX9-O0-NEXT: s_waitcnt vmcnt(0) +; GFX9-O0-NEXT: v_readlane_b32 s6, v30, 8 +; GFX9-O0-NEXT: v_readlane_b32 s7, v30, 9 ; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:216 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:220 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:224 ; 4-byte Folded Reload @@ -2112,13 +2125,8 @@ define i128 @v_urem_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:284 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:288 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:292 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1 -; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_mov_b64 exec, s[18:19] -; GFX9-O0-NEXT: s_waitcnt vmcnt(0) -; GFX9-O0-NEXT: v_readlane_b32 s6, v30, 8 -; GFX9-O0-NEXT: v_readlane_b32 s7, v30, 9 ; GFX9-O0-NEXT: s_mov_b32 s4, 63 +; GFX9-O0-NEXT: s_waitcnt vmcnt(16) ; GFX9-O0-NEXT: v_lshrrev_b64 v[28:29], s4, v[2:3] ; GFX9-O0-NEXT: v_mov_b32_e32 v5, v29 ; GFX9-O0-NEXT: s_mov_b32 s5, 1 @@ -2145,6 +2153,7 @@ define i128 @v_urem_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: v_lshrrev_b64 v[0:1], s4, v[0:1] ; GFX9-O0-NEXT: v_mov_b32_e32 v7, v1 ; GFX9-O0-NEXT: v_mov_b32_e32 v6, v29 +; GFX9-O0-NEXT: s_waitcnt vmcnt(10) ; GFX9-O0-NEXT: v_mov_b32_e32 v10, v27 ; GFX9-O0-NEXT: v_or3_b32 v6, v6, v7, v10 ; GFX9-O0-NEXT: v_mov_b32_e32 v1, v0 @@ -2154,6 +2163,7 @@ define i128 @v_urem_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec ; GFX9-O0-NEXT: v_mov_b32_e32 v1, v6 ; GFX9-O0-NEXT: v_mov_b32_e32 v7, v3 +; GFX9-O0-NEXT: s_waitcnt vmcnt(8) ; GFX9-O0-NEXT: v_mov_b32_e32 v6, v25 ; GFX9-O0-NEXT: v_or_b32_e64 v6, v6, v7 ; GFX9-O0-NEXT: v_mov_b32_e32 v3, v2 @@ -2165,10 +2175,12 @@ define i128 @v_urem_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: v_mov_b32_e32 v10, v5 ; GFX9-O0-NEXT: v_mov_b32_e32 v4, v22 ; GFX9-O0-NEXT: v_mov_b32_e32 v5, v23 +; GFX9-O0-NEXT: s_waitcnt vmcnt(1) ; GFX9-O0-NEXT: v_mov_b32_e32 v13, v11 ; GFX9-O0-NEXT: v_mov_b32_e32 v11, v14 ; GFX9-O0-NEXT: v_mov_b32_e32 v7, v15 ; GFX9-O0-NEXT: v_sub_co_u32_e32 v13, vcc, v13, v6 +; GFX9-O0-NEXT: s_waitcnt vmcnt(0) ; GFX9-O0-NEXT: v_subb_co_u32_e32 v12, vcc, v12, v10, vcc ; GFX9-O0-NEXT: v_subb_co_u32_e32 v11, vcc, v11, v4, vcc ; GFX9-O0-NEXT: v_subb_co_u32_e32 v7, vcc, v7, v5, vcc @@ -2308,6 +2320,9 @@ define i128 @v_urem_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: s_cbranch_execnz .LBB1_6 ; GFX9-O0-NEXT: s_branch .LBB1_1 ; GFX9-O0-NEXT: .LBB1_7: ; %udiv-preheader +; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1 +; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX9-O0-NEXT: s_mov_b64 exec, s[18:19] ; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:296 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:300 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:304 ; 4-byte Folded Reload @@ -2324,12 +2339,9 @@ define i128 @v_urem_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1 -; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_mov_b64 exec, s[18:19] -; GFX9-O0-NEXT: s_waitcnt vmcnt(10) +; GFX9-O0-NEXT: s_waitcnt vmcnt(9) ; GFX9-O0-NEXT: v_mov_b32_e32 v4, v10 -; GFX9-O0-NEXT: s_waitcnt vmcnt(1) +; GFX9-O0-NEXT: s_waitcnt vmcnt(0) ; GFX9-O0-NEXT: v_lshrrev_b64 v[6:7], v4, v[20:21] ; GFX9-O0-NEXT: v_mov_b32_e32 v5, v7 ; GFX9-O0-NEXT: s_mov_b32 s6, 64 @@ -2410,7 +2422,6 @@ define i128 @v_urem_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: v_mov_b32_e32 v14, s8 ; GFX9-O0-NEXT: v_mov_b32_e32 v13, s7 ; GFX9-O0-NEXT: v_mov_b32_e32 v12, s6 -; GFX9-O0-NEXT: s_waitcnt vmcnt(4) ; GFX9-O0-NEXT: v_writelane_b32 v30, s4, 8 ; GFX9-O0-NEXT: v_writelane_b32 v30, s5, 9 ; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1 @@ -2442,6 +2453,9 @@ define i128 @v_urem_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:220 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_branch .LBB1_6 ; GFX9-O0-NEXT: .LBB1_8: ; %udiv-bb1 +; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1 +; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX9-O0-NEXT: s_mov_b64 exec, s[18:19] ; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload @@ -2450,12 +2464,9 @@ define i128 @v_urem_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1 -; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_mov_b64 exec, s[18:19] ; GFX9-O0-NEXT: s_mov_b64 s[6:7], 1 ; GFX9-O0-NEXT: s_mov_b32 s5, s6 -; GFX9-O0-NEXT: s_waitcnt vmcnt(2) +; GFX9-O0-NEXT: s_waitcnt vmcnt(1) ; GFX9-O0-NEXT: v_mov_b32_e32 v3, v0 ; GFX9-O0-NEXT: s_mov_b32 s4, s7 ; GFX9-O0-NEXT: s_mov_b64 s[6:7], 0 @@ -2466,7 +2477,7 @@ define i128 @v_urem_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: v_mov_b32_e32 v4, s5 ; GFX9-O0-NEXT: v_add_co_u32_e32 v8, vcc, v3, v4 ; GFX9-O0-NEXT: v_mov_b32_e32 v4, s4 -; GFX9-O0-NEXT: s_waitcnt vmcnt(1) +; GFX9-O0-NEXT: s_waitcnt vmcnt(0) ; GFX9-O0-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v4, vcc ; GFX9-O0-NEXT: v_mov_b32_e32 v4, s8 ; GFX9-O0-NEXT: v_addc_co_u32_e32 v0, vcc, v0, v4, vcc @@ -2570,7 +2581,6 @@ define i128 @v_urem_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: s_mov_b64 s[6:7], exec ; GFX9-O0-NEXT: s_and_b64 s[4:5], s[6:7], s[4:5] ; GFX9-O0-NEXT: s_xor_b64 s[6:7], s[4:5], s[6:7] -; GFX9-O0-NEXT: s_waitcnt vmcnt(16) ; GFX9-O0-NEXT: v_writelane_b32 v30, s6, 6 ; GFX9-O0-NEXT: v_writelane_b32 v30, s7, 7 ; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1 diff --git a/llvm/test/CodeGen/AMDGPU/trap-abis.ll b/llvm/test/CodeGen/AMDGPU/trap-abis.ll index 4dfd4c095c87a03..0daa6860072616e 100644 --- a/llvm/test/CodeGen/AMDGPU/trap-abis.ll +++ b/llvm/test/CodeGen/AMDGPU/trap-abis.ll @@ -367,14 +367,15 @@ define amdgpu_kernel void @trap_with_use_after(ptr addrspace(1) %arg0, ptr addrs ; HSA-TRAP-GFX1100-O0-NEXT: scratch_store_b32 off, v0, off offset:4 ; 4-byte Folded Spill ; HSA-TRAP-GFX1100-O0-NEXT: s_cbranch_execnz .LBB2_2 ; HSA-TRAP-GFX1100-O0-NEXT: ; %bb.1: -; HSA-TRAP-GFX1100-O0-NEXT: scratch_load_b32 v0, off, off offset:8 ; 4-byte Folded Reload -; HSA-TRAP-GFX1100-O0-NEXT: scratch_load_b32 v1, off, off offset:4 ; 4-byte Folded Reload ; HSA-TRAP-GFX1100-O0-NEXT: s_or_saveexec_b32 s6, -1 ; HSA-TRAP-GFX1100-O0-NEXT: scratch_load_b32 v2, off, off ; 4-byte Folded Reload ; HSA-TRAP-GFX1100-O0-NEXT: s_mov_b32 exec_lo, s6 ; HSA-TRAP-GFX1100-O0-NEXT: s_waitcnt vmcnt(0) ; HSA-TRAP-GFX1100-O0-NEXT: v_readlane_b32 s0, v2, 0 ; HSA-TRAP-GFX1100-O0-NEXT: v_readlane_b32 s1, v2, 1 +; HSA-TRAP-GFX1100-O0-NEXT: scratch_load_b32 v0, off, off offset:8 ; 4-byte Folded Reload +; HSA-TRAP-GFX1100-O0-NEXT: scratch_load_b32 v1, off, off offset:4 ; 4-byte Folded Reload +; HSA-TRAP-GFX1100-O0-NEXT: s_waitcnt vmcnt(0) ; HSA-TRAP-GFX1100-O0-NEXT: global_store_b32 v0, v1, s[0:1] dlc ; HSA-TRAP-GFX1100-O0-NEXT: s_waitcnt_vscnt null, 0x0 ; HSA-TRAP-GFX1100-O0-NEXT: s_endpgm diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-spill-placement-issue61083.ll b/llvm/test/CodeGen/AMDGPU/vgpr-spill-placement-issue61083.ll index a827ebe96cfcf4c..2c9b53b46c098e8 100644 --- a/llvm/test/CodeGen/AMDGPU/vgpr-spill-placement-issue61083.ll +++ b/llvm/test/CodeGen/AMDGPU/vgpr-spill-placement-issue61083.ll @@ -35,7 +35,6 @@ define amdgpu_kernel void @__omp_offloading_16_dd2df_main_l9() { ; CHECK-NEXT: s_cbranch_execz .LBB0_2 ; CHECK-NEXT: ; %bb.1: ; %bb193 ; CHECK-NEXT: .LBB0_2: ; %bb194 -; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], 0 offset:4 ; 4-byte Folded Reload ; CHECK-NEXT: s_or_saveexec_b64 s[8:9], -1 ; CHECK-NEXT: buffer_load_dword v3, off, s[0:3], 0 ; 4-byte Folded Reload ; CHECK-NEXT: s_mov_b64 exec, s[8:9] @@ -43,7 +42,9 @@ define amdgpu_kernel void @__omp_offloading_16_dd2df_main_l9() { ; CHECK-NEXT: v_readlane_b32 s4, v3, 0 ; CHECK-NEXT: v_readlane_b32 s5, v3, 1 ; CHECK-NEXT: s_or_b64 exec, exec, s[4:5] +; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], 0 offset:4 ; 4-byte Folded Reload ; CHECK-NEXT: s_mov_b32 s4, 0xffff +; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: v_and_b32_e64 v0, s4, v0 ; CHECK-NEXT: s_mov_b32 s4, 0 ; CHECK-NEXT: v_cmp_ne_u32_e64 s[4:5], v0, s4 diff --git a/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll b/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll index 025381d5c16df8b..1089093ea691c31 100644 --- a/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll +++ b/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll @@ -206,9 +206,6 @@ define amdgpu_gfx void @strict_wwm_cfg(ptr addrspace(8) inreg %tmp14, i32 %arg) ; GFX9-O0-NEXT: v_mov_b32_e32 v0, v1 ; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-O0-NEXT: .LBB1_2: ; %merge -; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload ; GFX9-O0-NEXT: s_or_saveexec_b64 s[46:47], -1 ; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX9-O0-NEXT: s_mov_b64 exec, s[46:47] @@ -220,6 +217,9 @@ define amdgpu_gfx void @strict_wwm_cfg(ptr addrspace(8) inreg %tmp14, i32 %arg) ; GFX9-O0-NEXT: v_readlane_b32 s39, v5, 1 ; GFX9-O0-NEXT: v_readlane_b32 s34, v5, 2 ; GFX9-O0-NEXT: v_readlane_b32 s35, v5, 3 +; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload +; GFX9-O0-NEXT: s_waitcnt vmcnt(0) ; GFX9-O0-NEXT: v_cmp_eq_u32_e64 s[36:37], v0, v3 ; GFX9-O0-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[36:37] ; GFX9-O0-NEXT: s_mov_b32 s36, 1 diff --git a/llvm/test/CodeGen/AMDGPU/wwm-reserved.ll b/llvm/test/CodeGen/AMDGPU/wwm-reserved.ll index 312628c7b5451eb..027081752a11bb7 100644 --- a/llvm/test/CodeGen/AMDGPU/wwm-reserved.ll +++ b/llvm/test/CodeGen/AMDGPU/wwm-reserved.ll @@ -183,9 +183,6 @@ define amdgpu_cs void @cfg(ptr addrspace(8) inreg %tmp14, i32 %arg) { ; GFX9-O0-NEXT: v_mov_b32_e32 v0, v1 ; GFX9-O0-NEXT: buffer_store_dword v0, off, s[16:19], 0 offset:4 ; 4-byte Folded Spill ; GFX9-O0-NEXT: .LBB1_2: ; %merge -; GFX9-O0-NEXT: buffer_load_dword v0, off, s[16:19], 0 offset:8 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_load_dword v3, off, s[16:19], 0 offset:4 ; 4-byte Folded Reload ; GFX9-O0-NEXT: s_or_saveexec_b64 s[12:13], -1 ; GFX9-O0-NEXT: buffer_load_dword v5, off, s[16:19], 0 ; 4-byte Folded Reload ; GFX9-O0-NEXT: s_mov_b64 exec, s[12:13] @@ -197,6 +194,9 @@ define amdgpu_cs void @cfg(ptr addrspace(8) inreg %tmp14, i32 %arg) { ; GFX9-O0-NEXT: v_readlane_b32 s3, v5, 2 ; GFX9-O0-NEXT: v_readlane_b32 s0, v5, 3 ; GFX9-O0-NEXT: v_readlane_b32 s1, v5, 4 +; GFX9-O0-NEXT: buffer_load_dword v0, off, s[16:19], 0 offset:8 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v3, off, s[16:19], 0 offset:4 ; 4-byte Folded Reload +; GFX9-O0-NEXT: s_waitcnt vmcnt(0) ; GFX9-O0-NEXT: v_cmp_eq_u32_e64 s[4:5], v0, v3 ; GFX9-O0-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] ; GFX9-O0-NEXT: s_mov_b32 s4, 1 @@ -1035,9 +1035,6 @@ define amdgpu_cs void @strict_wwm_cfg(ptr addrspace(8) inreg %tmp14, i32 %arg) { ; GFX9-O0-NEXT: v_mov_b32_e32 v0, v1 ; GFX9-O0-NEXT: buffer_store_dword v0, off, s[16:19], 0 offset:4 ; 4-byte Folded Spill ; GFX9-O0-NEXT: .LBB8_2: ; %merge -; GFX9-O0-NEXT: buffer_load_dword v0, off, s[16:19], 0 offset:8 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_load_dword v3, off, s[16:19], 0 offset:4 ; 4-byte Folded Reload ; GFX9-O0-NEXT: s_or_saveexec_b64 s[12:13], -1 ; GFX9-O0-NEXT: buffer_load_dword v5, off, s[16:19], 0 ; 4-byte Folded Reload ; GFX9-O0-NEXT: s_mov_b64 exec, s[12:13] @@ -1049,6 +1046,9 @@ define amdgpu_cs void @strict_wwm_cfg(ptr addrspace(8) inreg %tmp14, i32 %arg) { ; GFX9-O0-NEXT: v_readlane_b32 s3, v5, 2 ; GFX9-O0-NEXT: v_readlane_b32 s0, v5, 3 ; GFX9-O0-NEXT: v_readlane_b32 s1, v5, 4 +; GFX9-O0-NEXT: buffer_load_dword v0, off, s[16:19], 0 offset:8 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v3, off, s[16:19], 0 offset:4 ; 4-byte Folded Reload +; GFX9-O0-NEXT: s_waitcnt vmcnt(0) ; GFX9-O0-NEXT: v_cmp_eq_u32_e64 s[4:5], v0, v3 ; GFX9-O0-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] ; GFX9-O0-NEXT: s_mov_b32 s4, 1 diff --git a/llvm/test/CodeGen/DirectX/radians.ll b/llvm/test/CodeGen/DirectX/radians.ll index 73ec013775c3e94..f31585cead3766e 100644 --- a/llvm/test/CodeGen/DirectX/radians.ll +++ b/llvm/test/CodeGen/DirectX/radians.ll @@ -44,9 +44,9 @@ define noundef <4 x half> @radians_half_vector(<4 x half> noundef %a) { ; CHECK: [[ee3:%.*]] = extractelement <4 x half> [[A]], i64 3 ; CHECK: [[ie3:%.*]] = fmul half [[ee3]], 0xH2478 ; CHECK: [[TMP0:%.*]] = insertelement <4 x half> poison, half [[ie0]], i64 0 -; CHECK: [[TMP1:%.*]] = insertelement <4 x half> %[[TMP0]], half [[ie1]], i64 1 -; CHECK: [[TMP2:%.*]] = insertelement <4 x half> %[[TMP1]], half [[ie2]], i64 2 -; CHECK: [[TMP3:%.*]] = insertelement <4 x half> %[[TMP2]], half [[ie3]], i64 3 +; CHECK: [[TMP1:%.*]] = insertelement <4 x half> [[TMP0]], half [[ie1]], i64 1 +; CHECK: [[TMP2:%.*]] = insertelement <4 x half> [[TMP1]], half [[ie2]], i64 2 +; CHECK: [[TMP3:%.*]] = insertelement <4 x half> [[TMP2]], half [[ie3]], i64 3 ; CHECK: ret <4 x half> [[TMP3]] ; entry: @@ -67,13 +67,12 @@ define noundef <4 x float> @radians_float_vector(<4 x float> noundef %a) { ; CHECK: [[ee3:%.*]] = extractelement <4 x float> [[A]], i64 3 ; CHECK: [[ie3:%.*]] = fmul float [[ee3]], 0x3F91DF46A0000000 ; CHECK: [[TMP0:%.*]] = insertelement <4 x float> poison, float [[ie0]], i64 0 -; CHECK: [[TMP1:%.*]] = insertelement <4 x float> %[[TMP0]], float [[ie1]], i64 1 -; CHECK: [[TMP2:%.*]] = insertelement <4 x float> %[[TMP1]], float [[ie2]], i64 2 -; CHECK: [[TMP3:%.*]] = insertelement <4 x float> %[[TMP2]], float [[ie3]], i64 3 +; CHECK: [[TMP1:%.*]] = insertelement <4 x float> [[TMP0]], float [[ie1]], i64 1 +; CHECK: [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float [[ie2]], i64 2 +; CHECK: [[TMP3:%.*]] = insertelement <4 x float> [[TMP2]], float [[ie3]], i64 3 ; CHECK: ret <4 x float> [[TMP3]] ; entry: %elt.radians = call <4 x float> @llvm.dx.radians.v4f32(<4 x float> %a) ret <4 x float> %elt.radians } - diff --git a/llvm/test/CodeGen/Generic/llc-start-stop.ll b/llvm/test/CodeGen/Generic/llc-start-stop.ll index b02472473a00cbc..942c5d0020f27a6 100644 --- a/llvm/test/CodeGen/Generic/llc-start-stop.ll +++ b/llvm/test/CodeGen/Generic/llc-start-stop.ll @@ -27,7 +27,7 @@ ; START-BEFORE: -machine-branch-prob -regalloc-evict -regalloc-priority -domtree ; START-BEFORE: FunctionPass Manager ; START-BEFORE: Loop Strength Reduction -; START-BEFORE-NEXT: Basic Alias Analysis (stateless AA impl) +; START-BEFORE-NEXT: {{Loop Terminator Folding|Basic Alias Analysis \(stateless AA impl\)}} ; RUN: not --crash llc < %s -start-before=nonexistent -o /dev/null 2>&1 | FileCheck %s -check-prefix=NONEXISTENT-START-BEFORE ; RUN: not --crash llc < %s -stop-before=nonexistent -o /dev/null 2>&1 | FileCheck %s -check-prefix=NONEXISTENT-STOP-BEFORE diff --git a/llvm/test/CodeGen/SPIRV/debug-info/debug-type-pointer.ll b/llvm/test/CodeGen/SPIRV/debug-info/debug-type-pointer.ll new file mode 100644 index 000000000000000..b7e6e95f366cf7c --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/debug-info/debug-type-pointer.ll @@ -0,0 +1,280 @@ +; RUN: llc --verify-machineinstrs --spv-emit-nonsemantic-debug-info --spirv-ext=+SPV_KHR_non_semantic_info --print-after=spirv-nonsemantic-debug-info -O0 -mtriple=spirv64-unknown-unknown %s -o - 2>&1 | FileCheck %s --check-prefix=CHECK-MIR +; RUN: llc --verify-machineinstrs --spv-emit-nonsemantic-debug-info --spirv-ext=+SPV_KHR_non_semantic_info -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s --check-prefix=CHECK-SPIRV +; RUN: llc --verify-machineinstrs -O0 -mtriple=spirv64-unknown-unknown --spirv-ext=+SPV_KHR_non_semantic_info %s -o - | FileCheck %s --check-prefix=CHECK-OPTION +; TODO(#109287): When type is void * the spirv-val raises an error when DebugInfoNone is set as Base Type argument of DebugTypePointer. +; DISABLED: %if spirv-tools %{ llc --verify-machineinstrs --spv-emit-nonsemantic-debug-info --spirv-ext=+SPV_KHR_non_semantic_info -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %} + +; CHECK-MIR-DAG: [[i32type:%[0-9]+\:type]] = OpTypeInt 32, 0 +; CHECK-MIR-DAG: [[void_type:%[0-9]+\:type\(s64\)]] = OpTypeVoid +; CHECK-MIR-DAG: [[i32_8:%[0-9]+\:iid]] = OpConstantI [[i32type]], 8 +; CHECK-MIR-DAG: [[i32_0:%[0-9]+\:iid]] = OpConstantI [[i32type]], 0 +; CHECK-MIR-DAG: [[i32_5:%[0-9]+\:iid\(s32\)]] = OpConstantI [[i32type]], 5 +; CHECK-MIR-DAG: [[enc_float:%[0-9]+\:iid\(s32\)]] = OpConstantI [[i32type]], 3 +; CHECK-MIR-DAG: [[enc_boolean:%[0-9]+\:iid\(s32\)]] = OpConstantI [[i32type]], 2 +; CHECK-MIR-DAG: [[bool:%[0-9]+\:id\(s32\)]] = OpExtInst [[void_type]], 3, 2, {{%[0-9]+\:[a-z0-9\(\)]+}}, [[i32_8]], [[enc_boolean]], [[i32_0]] +; CHECK-MIR-DAG: [[i32_16:%[0-9]+\:iid\(s32\)]] = OpConstantI [[i32type]], 16 +; CHECK-MIR-DAG: [[enc_signed:%[0-9]+\:iid\(s32\)]] = OpConstantI [[i32type]], 4 +; CHECK-MIR-DAG: [[short:%[0-9]+\:id\(s32\)]] = OpExtInst [[void_type]], 3, 2, {{%[0-9]+\:[a-z0-9\(\)]+}}, [[i32_16]], [[enc_signed]], [[i32_0]] +; CHECK-MIR-DAG: [[char:%[0-9]+\:id\(s32\)]] = OpExtInst [[void_type]], 3, 2, {{%[0-9]+\:[a-z0-9\(\)]+}}, [[i32_8]], [[i32_5]], [[i32_0]] +; CHECK-MIR-DAG: [[i32_64:%[0-9]+\:iid\(s32\)]] = OpConstantI [[i32type]], 64 +; CHECK-MIR-DAG: [[long:%[0-9]+\:id\(s32\)]] = OpExtInst [[void_type]], 3, 2, {{%[0-9]+\:[a-z0-9\(\)]+}}, [[i32_64]], [[enc_signed]], [[i32_0]] +; CHECK-MIR-DAG: [[i32_32:%[0-9]+\:iid\(s32\)]] = OpConstantI [[i32type]], 32 +; CHECK-MIR-DAG: [[enc_unsigned:%[0-9]+\:iid\(s32\)]] = OpConstantI [[i32type]], 6 +; CHECK-MIR-DAG: [[unsigned_int:%[0-9]+\:id\(s32\)]] = OpExtInst [[void_type]], 3, 2, {{%[0-9]+\:[a-z0-9\(\)]+}}, [[i32_32]], [[enc_unsigned]], [[i32_0]] +; CHECK-MIR-DAG: [[unsigned_short:%[0-9]+\:id\(s32\)]] = OpExtInst [[void_type]], 3, 2, {{%[0-9]+\:[a-z0-9\(\)]+}}, [[i32_16]], [[enc_unsigned]], [[i32_0]] +; CHECK-MIR-DAG: [[enc_unsigned_char:%[0-9]+\:iid\(s32\)]] = OpConstantI [[i32type]], 7 +; CHECK-MIR-DAG: [[unsigned_char:%[0-9]+\:id\(s32\)]] = OpExtInst [[void_type]], 3, 2, {{%[0-9]+\:[a-z0-9\(\)]+}}, [[i32_8]], [[enc_unsigned_char]], [[i32_0]] +; CHECK-MIR-DAG: [[unsigned_long:%[0-9]+\:id\(s32\)]] = OpExtInst [[void_type]], 3, 2, {{%[0-9]+\:[a-z0-9\(\)]+}}, [[i32_64]], [[enc_unsigned]], [[i32_0]] +; CHECK-MIR-DAG: [[float:%[0-9]+\:id\(s32\)]] = OpExtInst [[void_type]], 3, 2, {{%[0-9]+\:[a-z0-9\(\)]+}}, [[i32_32]], [[enc_float]], [[i32_0]] +; CHECK-MIR-DAG: [[double:%[0-9]+\:id\(s32\)]] = OpExtInst [[void_type]], 3, 2, {{%[0-9]+\:[a-z0-9\(\)]+}}, [[i32_64]], [[enc_float]], [[i32_0]] +; CHECK-MIR-DAG: [[int:%[0-9]+\:id\(s32\)]] = OpExtInst [[void_type]], 3, 2, {{%[0-9]+\:[a-z0-9\(\)]+}}, [[i32_32]], [[enc_signed]], [[i32_0]] +; CHECK-MIR: OpExtInst [[void_type]], 3, 3, [[bool]], [[i32_8]], [[i32_0]] +; CHECK-MIR: OpExtInst [[void_type]], 3, 3, [[short]], [[i32_8]], [[i32_0]] +; CHECK-MIR: OpExtInst [[void_type]], 3, 3, [[char]], [[i32_8]], [[i32_0]] +; CHECK-MIR: OpExtInst [[void_type]], 3, 3, [[long]], [[i32_8]], [[i32_0]] +; CHECK-MIR: OpExtInst [[void_type]], 3, 3, [[unsigned_int]], [[i32_8]], [[i32_0]] +; CHECK-MIR: OpExtInst [[void_type]], 3, 3, [[unsigned_short]], [[i32_8]], [[i32_0]] +; CHECK-MIR: OpExtInst [[void_type]], 3, 3, [[unsigned_char]], [[i32_8]], [[i32_0]] +; CHECK-MIR: OpExtInst [[void_type]], 3, 3, [[unsigned_long]], [[i32_8]], [[i32_0]] +; CHECK-MIR: OpExtInst [[void_type]], 3, 3, [[float]], [[i32_8]], [[i32_0]] +; CHECK-MIR: OpExtInst [[void_type]], 3, 3, [[double]], [[i32_8]], [[i32_0]] +; CHECK-MIR: OpExtInst [[void_type]], 3, 3, [[int]], [[i32_5]], [[i32_0]] +; CHECK-MIR: [[debug_info_none:%[0-9]+\:id\(s32\)]] = OpExtInst [[void_type]], 3, 0 +; CHECK-MIR: OpExtInst [[void_type]], 3, 3, [[debug_info_none]], [[i32_5]], [[i32_0]] + +; CHECK-SPIRV: [[i32type:%[0-9]+]] = OpTypeInt 32 0 +; CHECK-SPIRV-DAG: [[i32_8:%[0-9]+]] = OpConstant [[i32type]] 8 +; CHECK-SPIRV-DAG: [[i32_0:%[0-9]+]] = OpConstant [[i32type]] 0 +; CHECK-SPIRV-DAG: [[i32_5:%[0-9]+]] = OpConstant [[i32type]] 5 +; CHECK-SPIRV-DAG: [[enc_float:%[0-9]+]] = OpConstant [[i32type]] 3 +; CHECK-SPIRV-DAG: [[enc_boolean:%[0-9]+]] = OpConstant [[i32type]] 2 +; CHECK-SPIRV-DAG: [[i32_16:%[0-9]+]] = OpConstant [[i32type]] 16 +; CHECK-SPIRV-DAG: [[enc_signed:%[0-9]+]] = OpConstant [[i32type]] 4 +; CHECK-SPIRV-DAG: [[i32_64:%[0-9]+]] = OpConstant [[i32type]] 64 +; CHECK-SPIRV-DAG: [[i32_32:%[0-9]+]] = OpConstant [[i32type]] 32 +; CHECK-SPIRV-DAG: [[enc_unsigned:%[0-9]+]] = OpConstant [[i32type]] 6 +; CHECK-SPIRV-DAG: [[enc_unsigned_char:%[0-9]+]] = OpConstant [[i32type]] 7 +; CHECK-SPIRV-DAG: [[bool:%[0-9]+]] = OpExtInst {{%[0-9]+ %[0-9]+}} DebugTypeBasic {{%[0-9]+}} [[i32_8]] [[enc_boolean]] [[i32_0]] +; CHECK-SPIRV-DAG: [[short:%[0-9]+]] = OpExtInst {{%[0-9]+ %[0-9]+}} DebugTypeBasic {{%[0-9]+}} [[i32_16]] [[enc_signed]] [[i32_0]] +; CHECK-SPIRV-DAG: [[char:%[0-9]+]] = OpExtInst {{%[0-9]+ %[0-9]+}} DebugTypeBasic {{%[0-9]+}} [[i32_8]] [[i32_5]] [[i32_0]] +; CHECK-SPIRV-DAG: [[long:%[0-9]+]] = OpExtInst {{%[0-9]+ %[0-9]+}} DebugTypeBasic {{%[0-9]+}} [[i32_64]] [[enc_signed]] [[i32_0]] +; CHECK-SPIRV-DAG: [[unsigned_int:%[0-9]+]] = OpExtInst {{%[0-9]+ %[0-9]+}} DebugTypeBasic {{%[0-9]+}} [[i32_32]] [[enc_unsigned]] [[i32_0]] +; CHECK-SPIRV-DAG: [[unsigned_short:%[0-9]+]] = OpExtInst {{%[0-9]+ %[0-9]+}} DebugTypeBasic {{%[0-9]+}} [[i32_16]] [[enc_unsigned]] [[i32_0]] +; CHECK-SPIRV-DAG: [[unsigned_char:%[0-9]+]] = OpExtInst {{%[0-9]+ %[0-9]+}} DebugTypeBasic {{%[0-9]+}} [[i32_8]] [[enc_unsigned_char]] [[i32_0]] +; CHECK-SPIRV-DAG: [[unsigned_long:%[0-9]+]] = OpExtInst {{%[0-9]+ %[0-9]+}} DebugTypeBasic {{%[0-9]+}} [[i32_64]] [[enc_unsigned]] [[i32_0]] +; CHECK-SPIRV-DAG: [[float:%[0-9]+]] = OpExtInst {{%[0-9]+ %[0-9]+}} DebugTypeBasic {{%[0-9]+}} [[i32_32]] [[enc_float]] [[i32_0]] +; CHECK-SPIRV-DAG: [[double:%[0-9]+]] = OpExtInst {{%[0-9]+ %[0-9]+}} DebugTypeBasic {{%[0-9]+}} [[i32_64]] [[enc_float]] [[i32_0]] +; CHECK-SPIRV-DAG: [[int:%[0-9]+]] = OpExtInst {{%[0-9]+ %[0-9]+}} DebugTypeBasic {{%[0-9]+}} [[i32_32]] [[enc_signed]] [[i32_0]] +; CHECK-SPIRV-DAG: OpExtInst {{%[0-9]+ %[0-9]+}} DebugTypePointer [[bool]] [[i32_8]] [[i32_0]] +; CHECK-SPIRV-DAG: OpExtInst {{%[0-9]+ %[0-9]+}} DebugTypePointer [[short]] [[i32_8]] [[i32_0]] +; CHECK-SPIRV-DAG: OpExtInst {{%[0-9]+ %[0-9]+}} DebugTypePointer [[char]] [[i32_8]] [[i32_0]] +; CHECK-SPIRV-DAG: OpExtInst {{%[0-9]+ %[0-9]+}} DebugTypePointer [[long]] [[i32_8]] [[i32_0]] +; CHECK-SPIRV-DAG: OpExtInst {{%[0-9]+ %[0-9]+}} DebugTypePointer [[unsigned_int]] [[i32_8]] [[i32_0]] +; CHECK-SPIRV-DAG: OpExtInst {{%[0-9]+ %[0-9]+}} DebugTypePointer [[unsigned_short]] [[i32_8]] [[i32_0]] +; CHECK-SPIRV-DAG: OpExtInst {{%[0-9]+ %[0-9]+}} DebugTypePointer [[unsigned_char]] [[i32_8]] [[i32_0]] +; CHECK-SPIRV-DAG: OpExtInst {{%[0-9]+ %[0-9]+}} DebugTypePointer [[unsigned_long]] [[i32_8]] [[i32_0]] +; CHECK-SPIRV-DAG: OpExtInst {{%[0-9]+ %[0-9]+}} DebugTypePointer [[float]] [[i32_8]] [[i32_0]] +; CHECK-SPIRV-DAG: OpExtInst {{%[0-9]+ %[0-9]+}} DebugTypePointer [[double]] [[i32_8]] [[i32_0]] +; CHECK-SPIRV-DAG: OpExtInst {{%[0-9]+ %[0-9]+}} DebugTypePointer [[int]] [[i32_5]] [[i32_0]] +; CHECK-SPIRV-DAG: [[debug_info_none:%[0-9]+]] = OpExtInst {{%[0-9]+ %[0-9]+}} DebugInfoNone +; CHECK-SPIRV-DAG: OpExtInst {{%[0-9]+ %[0-9]+}} DebugTypePointer [[debug_info_none]] [[i32_5]] [[i32_0]] + +; CHECK-OPTION-NOT: DebugTypePointer + +@gi0 = dso_local addrspace(1) global ptr addrspace(4) null, align 4, !dbg !0 +@gv0 = dso_local addrspace(1) global ptr addrspace(4) null, align 4, !dbg !5 + +define spir_func i32 @test0() !dbg !17 { + %1 = alloca ptr addrspace(4), align 4 + %2 = alloca ptr addrspace(4), align 4 + %3 = alloca ptr addrspace(4), align 4 + %4 = alloca ptr addrspace(4), align 4 + %5 = alloca ptr addrspace(4), align 4 + %6 = alloca ptr addrspace(4), align 4 + %7 = alloca ptr addrspace(4), align 4 + %8 = alloca ptr addrspace(4), align 4 + %9 = alloca ptr addrspace(4), align 4 + %10 = alloca ptr addrspace(4), align 4 + %11 = alloca ptr addrspace(4), align 4 + %12 = alloca ptr addrspace(4), align 4 + %13 = alloca [8 x i32], align 4 + #dbg_declare(ptr %1, !21, !DIExpression(DW_OP_constu, 0, DW_OP_swap, DW_OP_xderef), !24) + store ptr addrspace(4) null, ptr %1, align 4, !dbg !24 + #dbg_declare(ptr %2, !25, !DIExpression(DW_OP_constu, 0, DW_OP_swap, DW_OP_xderef), !28) + store ptr addrspace(4) null, ptr %2, align 4, !dbg !28 + #dbg_declare(ptr %3, !29, !DIExpression(DW_OP_constu, 0, DW_OP_swap, DW_OP_xderef), !32) + store ptr addrspace(4) null, ptr %3, align 4, !dbg !32 + #dbg_declare(ptr %4, !33, !DIExpression(DW_OP_constu, 0, DW_OP_swap, DW_OP_xderef), !36) + store ptr addrspace(4) null, ptr %4, align 4, !dbg !36 + #dbg_declare(ptr %5, !37, !DIExpression(DW_OP_constu, 0, DW_OP_swap, DW_OP_xderef), !40) + store ptr addrspace(4) null, ptr %5, align 4, !dbg !40 + #dbg_declare(ptr %6, !41, !DIExpression(DW_OP_constu, 0, DW_OP_swap, DW_OP_xderef), !44) + store ptr addrspace(4) null, ptr %6, align 4, !dbg !44 + #dbg_declare(ptr %7, !45, !DIExpression(DW_OP_constu, 0, DW_OP_swap, DW_OP_xderef), !48) + store ptr addrspace(4) null, ptr %7, align 4, !dbg !48 + #dbg_declare(ptr %8, !49, !DIExpression(DW_OP_constu, 0, DW_OP_swap, DW_OP_xderef), !52) + store ptr addrspace(4) null, ptr %8, align 4, !dbg !52 + #dbg_declare(ptr %9, !53, !DIExpression(DW_OP_constu, 0, DW_OP_swap, DW_OP_xderef), !56) + store ptr addrspace(4) null, ptr %9, align 4, !dbg !56 + #dbg_declare(ptr %10, !57, !DIExpression(DW_OP_constu, 0, DW_OP_swap, DW_OP_xderef), !60) + store ptr addrspace(4) null, ptr %10, align 4, !dbg !60 + #dbg_declare(ptr %11, !61, !DIExpression(DW_OP_constu, 0, DW_OP_swap, DW_OP_xderef), !62) + store ptr addrspace(4) null, ptr %11, align 4, !dbg !62 + #dbg_declare(ptr %12, !63, !DIExpression(DW_OP_constu, 0, DW_OP_swap, DW_OP_xderef), !64) + %14 = load ptr addrspace(4), ptr %11, align 4, !dbg !65 + store ptr addrspace(4) %14, ptr %12, align 4, !dbg !64 + #dbg_declare(ptr %13, !66, !DIExpression(DW_OP_constu, 0, DW_OP_swap, DW_OP_xderef), !70) + ret i32 0, !dbg !71 +} + +define spir_func i32 @test1() !dbg !72 { + %1 = alloca ptr addrspace(4), align 4 + %2 = alloca ptr addrspace(4), align 4 + %3 = alloca ptr addrspace(4), align 4 + %4 = alloca ptr addrspace(4), align 4 + %5 = alloca ptr addrspace(4), align 4 + %6 = alloca ptr addrspace(4), align 4 + %7 = alloca ptr addrspace(4), align 4 + %8 = alloca ptr addrspace(4), align 4 + %9 = alloca ptr addrspace(4), align 4 + %10 = alloca ptr addrspace(4), align 4 + %11 = alloca ptr addrspace(4), align 4 + %12 = alloca ptr addrspace(4), align 4 + %13 = alloca [8 x i32], align 4 + #dbg_declare(ptr %1, !73, !DIExpression(DW_OP_constu, 0, DW_OP_swap, DW_OP_xderef), !74) + store ptr addrspace(4) null, ptr %1, align 4, !dbg !74 + #dbg_declare(ptr %2, !75, !DIExpression(DW_OP_constu, 0, DW_OP_swap, DW_OP_xderef), !76) + store ptr addrspace(4) null, ptr %2, align 4, !dbg !76 + #dbg_declare(ptr %3, !77, !DIExpression(DW_OP_constu, 0, DW_OP_swap, DW_OP_xderef), !78) + store ptr addrspace(4) null, ptr %3, align 4, !dbg !78 + #dbg_declare(ptr %4, !79, !DIExpression(DW_OP_constu, 0, DW_OP_swap, DW_OP_xderef), !80) + store ptr addrspace(4) null, ptr %4, align 4, !dbg !80 + #dbg_declare(ptr %5, !81, !DIExpression(DW_OP_constu, 0, DW_OP_swap, DW_OP_xderef), !82) + store ptr addrspace(4) null, ptr %5, align 4, !dbg !82 + #dbg_declare(ptr %6, !83, !DIExpression(DW_OP_constu, 0, DW_OP_swap, DW_OP_xderef), !84) + store ptr addrspace(4) null, ptr %6, align 4, !dbg !84 + #dbg_declare(ptr %7, !85, !DIExpression(DW_OP_constu, 0, DW_OP_swap, DW_OP_xderef), !86) + store ptr addrspace(4) null, ptr %7, align 4, !dbg !86 + #dbg_declare(ptr %8, !87, !DIExpression(DW_OP_constu, 0, DW_OP_swap, DW_OP_xderef), !88) + store ptr addrspace(4) null, ptr %8, align 4, !dbg !88 + #dbg_declare(ptr %9, !89, !DIExpression(DW_OP_constu, 0, DW_OP_swap, DW_OP_xderef), !90) + store ptr addrspace(4) null, ptr %9, align 4, !dbg !90 + #dbg_declare(ptr %10, !91, !DIExpression(DW_OP_constu, 0, DW_OP_swap, DW_OP_xderef), !92) + store ptr addrspace(4) null, ptr %10, align 4, !dbg !92 + #dbg_declare(ptr %11, !93, !DIExpression(DW_OP_constu, 0, DW_OP_swap, DW_OP_xderef), !94) + store ptr addrspace(4) null, ptr %11, align 4, !dbg !94 + #dbg_declare(ptr %12, !95, !DIExpression(DW_OP_constu, 0, DW_OP_swap, DW_OP_xderef), !96) + %14 = load ptr addrspace(4), ptr %11, align 4, !dbg !97 + store ptr addrspace(4) %14, ptr %12, align 4, !dbg !96 + #dbg_declare(ptr %13, !98, !DIExpression(DW_OP_constu, 0, DW_OP_swap, DW_OP_xderef), !99) + ret i32 0, !dbg !100 +} + +!llvm.dbg.cu = !{!2} +!llvm.module.flags = !{!10, !11, !12, !13} +!opencl.ocl.version = !{!14} +!opencl.cxx.version = !{!15} +!opencl.spir.version = !{!14} +!llvm.ident = !{!16} + +!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)) +!1 = distinct !DIGlobalVariable(name: "gi0", scope: !2, file: !3, line: 1, type: !8, isLocal: false, isDefinition: true) +!2 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !3, producer: "clang version XX.X.XXXX (FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, globals: !4, splitDebugInlining: false, nameTableKind: None) +!3 = !DIFile(filename: "example.cpp", directory: "/AAAAAAAAAA/BBBBBBBB/CCCCCCCCC", checksumkind: CSK_MD5, checksum: "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF") +!4 = !{!0, !5} +!5 = !DIGlobalVariableExpression(var: !6, expr: !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)) +!6 = distinct !DIGlobalVariable(name: "gv0", scope: !2, file: !3, line: 3, type: !7, isLocal: false, isDefinition: true) +!7 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: null, size: 32, dwarfAddressSpace: 1) +!8 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !9, size: 32, dwarfAddressSpace: 1) +!9 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!10 = !{i32 7, !"Dwarf Version", i32 5} +!11 = !{i32 2, !"Debug Info Version", i32 3} +!12 = !{i32 1, !"wchar_size", i32 4} +!13 = !{i32 7, !"frame-pointer", i32 2} +!14 = !{i32 2, i32 0} +!15 = !{i32 1, i32 0} +!16 = !{!"clang version 20.0.0git (https://github.com/bwlodarcz/llvm-project de1f5b96adcea52bf7c9670c46123fe1197050d2)"} +!17 = distinct !DISubprogram(name: "test0", linkageName: "test0", scope: !3, file: !3, line: 5, type: !18, scopeLine: 5, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !2, retainedNodes: !20) +!18 = !DISubroutineType(cc: DW_CC_LLVM_SpirFunction, types: !19) +!19 = !{!9} +!20 = !{} +!21 = !DILocalVariable(name: "bp0", scope: !17, file: !3, line: 6, type: !22) +!22 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !23, size: 32, dwarfAddressSpace: 4) +!23 = !DIBasicType(name: "bool", size: 8, encoding: DW_ATE_boolean) +!24 = !DILocation(line: 6, column: 9, scope: !17) +!25 = !DILocalVariable(name: "sp0", scope: !17, file: !3, line: 7, type: !26) +!26 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !27, size: 32, dwarfAddressSpace: 4) +!27 = !DIBasicType(name: "short", size: 16, encoding: DW_ATE_signed) +!28 = !DILocation(line: 7, column: 10, scope: !17) +!29 = !DILocalVariable(name: "cp0", scope: !17, file: !3, line: 8, type: !30) +!30 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !31, size: 32, dwarfAddressSpace: 4) +!31 = !DIBasicType(name: "char", size: 8, encoding: DW_ATE_signed_char) +!32 = !DILocation(line: 8, column: 9, scope: !17) +!33 = !DILocalVariable(name: "lp0", scope: !17, file: !3, line: 9, type: !34) +!34 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !35, size: 32, dwarfAddressSpace: 4) +!35 = !DIBasicType(name: "long", size: 64, encoding: DW_ATE_signed) +!36 = !DILocation(line: 9, column: 9, scope: !17) +!37 = !DILocalVariable(name: "uip0", scope: !17, file: !3, line: 10, type: !38) +!38 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !39, size: 32, dwarfAddressSpace: 4) +!39 = !DIBasicType(name: "unsigned int", size: 32, encoding: DW_ATE_unsigned) +!40 = !DILocation(line: 10, column: 17, scope: !17) +!41 = !DILocalVariable(name: "usp0", scope: !17, file: !3, line: 11, type: !42) +!42 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !43, size: 32, dwarfAddressSpace: 4) +!43 = !DIBasicType(name: "unsigned short", size: 16, encoding: DW_ATE_unsigned) +!44 = !DILocation(line: 11, column: 19, scope: !17) +!45 = !DILocalVariable(name: "ucp0", scope: !17, file: !3, line: 12, type: !46) +!46 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !47, size: 32, dwarfAddressSpace: 4) +!47 = !DIBasicType(name: "unsigned char", size: 8, encoding: DW_ATE_unsigned_char) +!48 = !DILocation(line: 12, column: 18, scope: !17) +!49 = !DILocalVariable(name: "ulp0", scope: !17, file: !3, line: 13, type: !50) +!50 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !51, size: 32, dwarfAddressSpace: 4) +!51 = !DIBasicType(name: "unsigned long", size: 64, encoding: DW_ATE_unsigned) +!52 = !DILocation(line: 13, column: 18, scope: !17) +!53 = !DILocalVariable(name: "fp0", scope: !17, file: !3, line: 14, type: !54) +!54 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !55, size: 32, dwarfAddressSpace: 4) +!55 = !DIBasicType(name: "float", size: 32, encoding: DW_ATE_float) +!56 = !DILocation(line: 14, column: 10, scope: !17) +!57 = !DILocalVariable(name: "dp0", scope: !17, file: !3, line: 15, type: !58) +!58 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !59, size: 32, dwarfAddressSpace: 4) +!59 = !DIBasicType(name: "double", size: 64, encoding: DW_ATE_float) +!60 = !DILocation(line: 15, column: 11, scope: !17) +!61 = !DILocalVariable(name: "ip0", scope: !17, file: !3, line: 16, type: !8) +!62 = !DILocation(line: 16, column: 8, scope: !17) +!63 = !DILocalVariable(name: "addr0", scope: !17, file: !3, line: 17, type: !7) +!64 = !DILocation(line: 17, column: 9, scope: !17) +!65 = !DILocation(line: 17, column: 17, scope: !17) +!66 = !DILocalVariable(name: "arr0", scope: !17, file: !3, line: 18, type: !67) +!67 = !DICompositeType(tag: DW_TAG_array_type, baseType: !9, size: 256, elements: !68) +!68 = !{!69} +!69 = !DISubrange(count: 8) +!70 = !DILocation(line: 18, column: 7, scope: !17) +!71 = !DILocation(line: 19, column: 3, scope: !17) +!72 = distinct !DISubprogram(name: "test1", linkageName: "test1", scope: !3, file: !3, line: 22, type: !18, scopeLine: 22, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !2, retainedNodes: !20) +!73 = !DILocalVariable(name: "bp1", scope: !72, file: !3, line: 23, type: !22) +!74 = !DILocation(line: 23, column: 9, scope: !72) +!75 = !DILocalVariable(name: "sp1", scope: !72, file: !3, line: 24, type: !26) +!76 = !DILocation(line: 24, column: 10, scope: !72) +!77 = !DILocalVariable(name: "cp1", scope: !72, file: !3, line: 25, type: !30) +!78 = !DILocation(line: 25, column: 9, scope: !72) +!79 = !DILocalVariable(name: "lp1", scope: !72, file: !3, line: 26, type: !34) +!80 = !DILocation(line: 26, column: 9, scope: !72) +!81 = !DILocalVariable(name: "uip1", scope: !72, file: !3, line: 27, type: !38) +!82 = !DILocation(line: 27, column: 17, scope: !72) +!83 = !DILocalVariable(name: "usp1", scope: !72, file: !3, line: 28, type: !42) +!84 = !DILocation(line: 28, column: 19, scope: !72) +!85 = !DILocalVariable(name: "ucp1", scope: !72, file: !3, line: 29, type: !46) +!86 = !DILocation(line: 29, column: 18, scope: !72) +!87 = !DILocalVariable(name: "ulp1", scope: !72, file: !3, line: 30, type: !50) +!88 = !DILocation(line: 30, column: 18, scope: !72) +!89 = !DILocalVariable(name: "fp1", scope: !72, file: !3, line: 31, type: !54) +!90 = !DILocation(line: 31, column: 10, scope: !72) +!91 = !DILocalVariable(name: "dp1", scope: !72, file: !3, line: 32, type: !58) +!92 = !DILocation(line: 32, column: 11, scope: !72) +!93 = !DILocalVariable(name: "ip1", scope: !72, file: !3, line: 33, type: !8) +!94 = !DILocation(line: 33, column: 8, scope: !72) +!95 = !DILocalVariable(name: "addr1", scope: !72, file: !3, line: 34, type: !7) +!96 = !DILocation(line: 34, column: 9, scope: !72) +!97 = !DILocation(line: 34, column: 17, scope: !72) +!98 = !DILocalVariable(name: "arr1", scope: !72, file: !3, line: 35, type: !67) +!99 = !DILocation(line: 35, column: 7, scope: !72) +!100 = !DILocation(line: 36, column: 3, scope: !72) diff --git a/llvm/test/MC/RISCV/insn-invalid.s b/llvm/test/MC/RISCV/insn-invalid.s index d6fabea4e170164..ef2f3c382972abc 100644 --- a/llvm/test/MC/RISCV/insn-invalid.s +++ b/llvm/test/MC/RISCV/insn-invalid.s @@ -26,8 +26,30 @@ .insn . # CHECK: :[[@LINE]]:7: error: expected instruction format or an integer constant .insn 0x2, # CHECK: :[[@LINE]]:12: error: expected an integer constant -.insn 0x2, 0xffff # CHECK: :[[@LINE]]:7: error: instruction length mismatch -.insn 0x2, 0xffffffff # CHECK: :[[@LINE]]:7: error: instruction length mismatch -.insn 0xffffffffff # CHECK: :[[@LINE]]:7: error: invalid operand for instruction -.insn 0x0010 # CHECK: :[[@LINE]]:7: error: compressed instructions are not allowed + .insn 0x4, 0x13, 0 # CHECK: :[[@LINE]]:16: error: invalid operand for instruction + +.insn 0x2, 0xffff # CHECK: :[[@LINE]]:7: error: instruction length does not match the encoding +.insn 0x2, 0xffffffff # CHECK: :[[@LINE]]:7: error: instruction length does not match the encoding +.insn 0xffffffffff # CHECK: :[[@LINE]]:7: error: encoding value does not fit into instruction + +.insn 0x0, 0x0 # CHECK: :[[@LINE]]:7: error: instruction lengths must be a non-zero multiple of two +.insn 0x1, 0xff # CHECK: :[[@LINE]]:7: error: instruction lengths must be a non-zero multiple of two +.insn 10, 0x000007f # CHECK: :[[@LINE]]:7: error: instruction lengths over 64 bits are not supported + +.insn 0x2, 0x03 # CHECK: :[[@LINE]]:7: error: instruction length does not match the encoding +.insn 0x2, 0x1f # CHECK: :[[@LINE]]:7: error: instruction length does not match the encoding +.insn 0x2, 0x3f # CHECK: :[[@LINE]]:7: error: instruction length does not match the encoding + +.insn 0x4, 0x00000001 # CHECK: :[[@LINE]]:7: error: instruction length does not match the encoding + +.insn 0x6, 0x000000000001 # CHECK: :[[@LINE]]:7: error: compressed instructions are not allowed +.insn 0x8, 0x0000000000000001 # CHECK: :[[@LINE]]:7: error: compressed instructions are not allowed + +.insn 0x2, 0x10001 # CHECK: :[[@LINE]]:7: error: encoding value does not fit into instruction +.insn 0x4, 0x100000003 # CHECK: :[[@LINE]]:7: error: encoding value does not fit into instruction +.insn 0x6, 0x100000000001f # CHECK: :[[@LINE]]:7: error: encoding value does not fit into instruction +.insn 0x8, 0x1000000000000003f # CHECK: :[[@LINE]]:12: error: expected an integer constant + +.insn 0x0010 # CHECK: :[[@LINE]]:7: error: compressed instructions are not allowed +.insn 0x2, 0x0001 # CHECK: :[[@LINE]]:7: error: compressed instructions are not allowed diff --git a/llvm/test/MC/RISCV/insn.s b/llvm/test/MC/RISCV/insn.s index b95c3b87b442f29..e32fec25bb16b43 100644 --- a/llvm/test/MC/RISCV/insn.s +++ b/llvm/test/MC/RISCV/insn.s @@ -164,3 +164,23 @@ target: # CHECK-ASM: encoding: [0x13,0x00,0x00,0x00] # CHECK-OBJ: addi zero, zero, 0x0 .insn 0x4, 0x13 + +# CHECK-ASM: .insn 0x6, 31 +# CHECK-ASM: encoding: [0x1f,0x00,0x00,0x00,0x00,0x00] +# CHECK-OBJ: +.insn 6, 0x1f + +# CHECK-ASM: .insn 0x4, 65503 +# CHECK-ASM: encoding: [0xdf,0xff,0x00,0x00] +# CHECK-OBJ: +.insn 0xffdf + +# CHECK-ASM: .insn 0x8, 63 +# CHECK-ASM: encoding: [0x3f,0x00,0x00,0x00,0x00,0x00,0x00,0x00] +# CHECK-OBJ: +.insn 8, 0x3f + +# CHECK-ASM: .insn 0x4, 65471 +# CHECK-ASM: encoding: [0xbf,0xff,0x00,0x00] +# CHECK-OBJ: +.insn 0xffbf diff --git a/llvm/test/MC/RISCV/insn_c-invalid.s b/llvm/test/MC/RISCV/insn_c-invalid.s index 3b424b2a9fd3294..9af864edc31e249 100644 --- a/llvm/test/MC/RISCV/insn_c-invalid.s +++ b/llvm/test/MC/RISCV/insn_c-invalid.s @@ -24,4 +24,4 @@ ## Make fake mnemonics we use to match these in the tablegened asm match table isn't exposed. .insn_cr 2, 9, a0, a1 # CHECK: :[[#@LINE]]:1: error: unknown directive -.insn 0xfffffff0 # CHECK: :[[@LINE]]:7: error: invalid operand for instruction +.insn 0xfffffff0 # CHECK: :[[@LINE]]:7: error: encoding value does not fit into instruction diff --git a/llvm/test/MC/WebAssembly/annotations.s b/llvm/test/MC/WebAssembly/annotations.s index 59a1349f6eaf20d..f761ef3f06b1fc1 100644 --- a/llvm/test/MC/WebAssembly/annotations.s +++ b/llvm/test/MC/WebAssembly/annotations.s @@ -7,8 +7,9 @@ .section .text.test_annotation,"",@ .type test_annotation,@function test_annotation: - .functype test_annotation () -> () + .functype test_annotation (exnref) -> () .tagtype __cpp_exception i32 + .tagtype __c_longjmp i32 try br 0 catch __cpp_exception @@ -53,8 +54,18 @@ test_annotation: return end_block drop - end_function + i32.const 0 + loop (i32) -> () + local.get 0 + loop (exnref) -> () + try_table (catch __cpp_exception 1) (catch_all_ref 0) + end_try_table + drop + end_loop + drop + end_loop + end_function # CHECK: test_annotation: # CHECK: try @@ -104,5 +115,16 @@ test_annotation: # CHECK-NEXT: return # CHECK-NEXT: end_block # label7: # CHECK-NEXT: drop -# CHECK-NEXT: end_function +# CHECK: i32.const 0 +# CHECK-NEXT: loop (i32) -> () # label12: +# CHECK-NEXT: local.get 0 +# CHECK-NEXT: loop (exnref) -> () # label13: +# CHECK-NEXT: try_table (catch __cpp_exception 1) (catch_all_ref 0) # 1: up to label12 +# CHECK-NEXT: # 0: up to label13 +# CHECK-NEXT: end_try_table # label14: +# CHECK-NEXT: drop +# CHECK-NEXT: end_loop +# CHECK-NEXT: drop +# CHECK-NEXT: end_loop +# CHECK-NEXT: end_function diff --git a/llvm/test/MC/WebAssembly/eh-assembly-legacy.s b/llvm/test/MC/WebAssembly/eh-assembly-legacy.s deleted file mode 100644 index deba6cc683f035d..000000000000000 --- a/llvm/test/MC/WebAssembly/eh-assembly-legacy.s +++ /dev/null @@ -1,99 +0,0 @@ -# RUN: llvm-mc -triple=wasm32-unknown-unknown -mattr=+exception-handling < %s | FileCheck %s -# Check that it converts to .o without errors, but don't check any output: -# RUN: llvm-mc -triple=wasm32-unknown-unknown -filetype=obj -mattr=+exception-handling -o %t.o < %s - - .tagtype __cpp_exception i32 - .tagtype __c_longjmp i32 - .functype foo () -> () - -eh_legacy_test: - .functype eh_legacy_test () -> () - - # try-catch with catch, catch_all, throw, and rethrow - try - i32.const 3 - throw __cpp_exception - catch __cpp_exception - drop - rethrow 0 - catch __c_longjmp - drop - catch_all - rethrow 0 - end_try - - # Nested try-catch with a rethrow - try - call foo - catch_all - try - catch_all - rethrow 1 - end_try - end_try - - # try-catch with a single return value - try i32 - i32.const 0 - catch __cpp_exception - end_try - drop - - # try-catch with a mulvivalue return - try () -> (i32, f32) - i32.const 0 - f32.const 0.0 - catch __cpp_exception - f32.const 1.0 - end_try - drop - drop - - # Catch-less try - try - call foo - end_try - end_function - -# CHECK-LABEL: eh_legacy_test: -# CHECK: try -# CHECK-NEXT: i32.const 3 -# CHECK-NEXT: throw __cpp_exception -# CHECK-NEXT: catch __cpp_exception -# CHECK-NEXT: drop -# CHECK-NEXT: rethrow 0 -# CHECK-NEXT: catch __c_longjmp -# CHECK-NEXT: drop -# CHECK-NEXT: catch_all -# CHECK-NEXT: rethrow 0 -# CHECK-NEXT: end_try - -# CHECK: try -# CHECK-NEXT: call foo -# CHECK-NEXT: catch_all -# CHECK-NEXT: try -# CHECK-NEXT: catch_all -# CHECK-NEXT: rethrow 1 -# CHECK-NEXT: end_try -# CHECK-NEXT: end_try - -# CHECK: try i32 -# CHECK-NEXT: i32.const 0 -# CHECK-NEXT: catch __cpp_exception -# CHECK-NEXT: end_try -# CHECK-NEXT: drop - -# CHECK: try () -> (i32, f32) -# CHECK-NEXT: i32.const 0 -# CHECK-NEXT: f32.const 0x0p0 -# CHECK-NEXT: catch __cpp_exception -# CHECK-NEXT: f32.const 0x1p0 -# CHECK-NEXT: end_try -# CHECK-NEXT: drop -# CHECK-NEXT: drop - -# CHECK: try -# CHECK-NEXT: call foo -# CHECK-NEXT: end_try -# CHECK-NEXT: end_function - diff --git a/llvm/test/MC/WebAssembly/eh-assembly.s b/llvm/test/MC/WebAssembly/eh-assembly.s index b4d6b324d96e3ea..31dfce5a3cde31b 100644 --- a/llvm/test/MC/WebAssembly/eh-assembly.s +++ b/llvm/test/MC/WebAssembly/eh-assembly.s @@ -1,13 +1,13 @@ -# RUN: llvm-mc -triple=wasm32-unknown-unknown -mattr=+exception-handling --no-type-check < %s | FileCheck %s +# RUN: llvm-mc -triple=wasm32-unknown-unknown -mattr=+exception-handling < %s | FileCheck %s # Check that it converts to .o without errors, but don't check any output: -# RUN: llvm-mc -triple=wasm32-unknown-unknown -filetype=obj -mattr=+exception-handling --no-type-check -o %t.o < %s +# RUN: llvm-mc -triple=wasm32-unknown-unknown -filetype=obj -mattr=+exception-handling -o %t.o < %s .tagtype __cpp_exception i32 .tagtype __c_longjmp i32 .functype foo () -> () eh_test: - .functype eh_test () -> () + .functype eh_test (exnref) -> () # try_table with all four kinds of catch clauses block exnref @@ -24,7 +24,6 @@ eh_test: return end_block throw_ref - drop end_block return end_block @@ -84,8 +83,69 @@ eh_test: drop drop + # try_table targeting loops + i32.const 0 + loop (i32) -> () + local.get 0 + loop (exnref) -> () + try_table (catch __cpp_exception 1) (catch_all_ref 0) + end_try_table + drop + end_loop + drop + end_loop + end_function + +eh_legacy_test: + .functype eh_legacy_test () -> () + + # try-catch with catch, catch_all, throw, and rethrow + try + i32.const 3 + throw __cpp_exception + catch __cpp_exception + drop + rethrow 0 + catch __c_longjmp + drop + catch_all + rethrow 0 + end_try + + # Nested try-catch with a rethrow + try + call foo + catch_all + try + catch_all + rethrow 1 + end_try + end_try + + # try-catch with a single return value + try i32 + i32.const 0 + catch __cpp_exception + end_try + drop + + # try-catch with a mulvivalue return + try () -> (i32, f32) + i32.const 0 + f32.const 0.0 + catch __cpp_exception + f32.const 1.0 + end_try + drop + drop + + # Catch-less try + try + call foo + end_try end_function + # CHECK-LABEL: eh_test: # CHECK: block exnref # CHECK-NEXT: block @@ -101,7 +161,6 @@ eh_test: # CHECK-NEXT: return # CHECK-NEXT: end_block # CHECK-NEXT: throw_ref -# CHECK-NEXT: drop # CHECK-NEXT: end_block # CHECK-NEXT: return # CHECK-NEXT: end_block @@ -155,3 +214,55 @@ eh_test: # CHECK-NEXT: end_try_table # CHECK-NEXT: drop # CHECK-NEXT: drop + +# CHECK: i32.const 0 +# CHECK-NEXT: loop (i32) -> () +# CHECK-NEXT: local.get 0 +# CHECK-NEXT: loop (exnref) -> () +# CHECK-NEXT: try_table (catch __cpp_exception 1) (catch_all_ref 0) +# CHECK: end_try_table +# CHECK-NEXT: drop +# CHECK-NEXT: end_loop +# CHECK-NEXT: drop +# CHECK-NEXT: end_loop + +# CHECK: eh_legacy_test: +# CHECK: try +# CHECK-NEXT: i32.const 3 +# CHECK-NEXT: throw __cpp_exception +# CHECK-NEXT: catch __cpp_exception +# CHECK-NEXT: drop +# CHECK-NEXT: rethrow 0 +# CHECK-NEXT: catch __c_longjmp +# CHECK-NEXT: drop +# CHECK-NEXT: catch_all +# CHECK-NEXT: rethrow 0 +# CHECK-NEXT: end_try + +# CHECK: try +# CHECK-NEXT: call foo +# CHECK-NEXT: catch_all +# CHECK-NEXT: try +# CHECK-NEXT: catch_all +# CHECK-NEXT: rethrow 1 +# CHECK-NEXT: end_try +# CHECK-NEXT: end_try + +# CHECK: try i32 +# CHECK-NEXT: i32.const 0 +# CHECK-NEXT: catch __cpp_exception +# CHECK-NEXT: end_try +# CHECK-NEXT: drop + +# CHECK: try () -> (i32, f32) +# CHECK-NEXT: i32.const 0 +# CHECK-NEXT: f32.const 0x0p0 +# CHECK-NEXT: catch __cpp_exception +# CHECK-NEXT: f32.const 0x1p0 +# CHECK-NEXT: end_try +# CHECK-NEXT: drop +# CHECK-NEXT: drop + +# CHECK: try +# CHECK-NEXT: call foo +# CHECK-NEXT: end_try diff --git a/llvm/test/MC/WebAssembly/type-checker-errors.s b/llvm/test/MC/WebAssembly/type-checker-errors.s index c5187d10fdca071..9aa652348c538e1 100644 --- a/llvm/test/MC/WebAssembly/type-checker-errors.s +++ b/llvm/test/MC/WebAssembly/type-checker-errors.s @@ -944,3 +944,43 @@ block_param_and_return: # CHECK: :[[@LINE+1]]:3: error: type mismatch, expected [] but got [f32] end_function + + .tagtype __cpp_exception i32 + +eh_test: + .functype eh_test () -> () + block i32 + block i32 + block i32 + block +# CHECK: :[[@LINE+4]]:11: error: try_table: catch index 0: type mismatch, catch tag type is [i32], but destination's type is [] +# CHECK: :[[@LINE+3]]:11: error: try_table: catch index 1: type mismatch, catch tag type is [i32, exnref], but destination's type is [i32] +# CHECK: :[[@LINE+2]]:11: error: try_table: catch index 2: type mismatch, catch tag type is [], but destination's type is [i32] +# CHECK: :[[@LINE+1]]:11: error: try_table: catch index 3: type mismatch, catch tag type is [exnref], but destination's type is [i32] + try_table i32 (catch __cpp_exception 0) (catch_ref __cpp_exception 1) (catch_all 2) (catch_all_ref 3) +# CHECK: :[[@LINE+1]]:11: error: type mismatch, expected [i32] but got [] + end_try_table + drop + end_block + end_block + end_block + end_block + drop + + loop + i32.const 0 + loop (i32) -> () + loop (i32) -> () + loop +# CHECK: :[[@LINE+4]]:11: error: try_table: catch index 0: type mismatch, catch tag type is [i32], but destination's type is [] +# CHECK: :[[@LINE+3]]:11: error: try_table: catch index 1: type mismatch, catch tag type is [i32, exnref], but destination's type is [i32] +# CHECK: :[[@LINE+2]]:11: error: try_table: catch index 2: type mismatch, catch tag type is [], but destination's type is [i32] +# CHECK: :[[@LINE+1]]:11: error: try_table: catch index 3: type mismatch, catch tag type is [exnref], but destination's type is [] + try_table (catch __cpp_exception 0) (catch_ref __cpp_exception 1) (catch_all 2) (catch_all_ref 3) + end_try_table + end_loop + drop + end_loop + end_loop + end_loop + end_function diff --git a/llvm/test/TableGen/GlobalISelCombinerEmitter/match-table.td b/llvm/test/TableGen/GlobalISelCombinerEmitter/match-table.td index bafc19a2b15de30..92baab91c620ebe 100644 --- a/llvm/test/TableGen/GlobalISelCombinerEmitter/match-table.td +++ b/llvm/test/TableGen/GlobalISelCombinerEmitter/match-table.td @@ -136,14 +136,18 @@ def MyCombiner: GICombiner<"GenMyCombiner", [ // CHECK: const uint8_t *GenMyCombiner::getMatchTable() const { // CHECK-NEXT: constexpr static uint8_t MatchTable0[] = { // CHECK-NEXT: GIM_SwitchOpcode, /*MI*/0, /*[*/GIMT_Encode2([[#LOWER:]]), GIMT_Encode2([[#UPPER:]]), /*)*//*default:*//*Label 6*/ GIMT_Encode4([[#DEFAULT:]]), -// CHECK-NEXT: /*TargetOpcode::COPY*//*Label 0*/ GIMT_Encode4(482), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), -// CHECK-NEXT: /*TargetOpcode::G_AND*//*Label 1*/ GIMT_Encode4(518), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), -// CHECK-NEXT: /*TargetOpcode::G_STORE*//*Label 2*/ GIMT_Encode4(565), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), -// CHECK-NEXT: /*TargetOpcode::G_TRUNC*//*Label 3*/ GIMT_Encode4(599), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), -// CHECK-NEXT: /*TargetOpcode::G_SEXT*//*Label 4*/ GIMT_Encode4(622), GIMT_Encode4(0), -// CHECK-NEXT: /*TargetOpcode::G_ZEXT*//*Label 5*/ GIMT_Encode4(634), +// CHECK-NEXT: /*TargetOpcode::COPY*//*Label 0*/ GIMT_Encode4([[#%u, mul(UPPER-LOWER, 4) + 10]]), +// CHECK-COUNT-40: GIMT_Encode4(0), +// CHECK-NEXT: /*TargetOpcode::G_AND*//*Label 1*/ GIMT_Encode4([[L1:[0-9]+]]), +// CHECK-COUNT-34: GIMT_Encode4(0), +// CHECK-NEXT: /*TargetOpcode::G_STORE*//*Label 2*/ GIMT_Encode4([[L2:[0-9]+]]), +// CHECK-COUNT-30: GIMT_Encode4(0), +// CHECK-NEXT: /*TargetOpcode::G_TRUNC*//*Label 3*/ GIMT_Encode4([[L3:[0-9]+]]), +// CHECK-COUNT-4: GIMT_Encode4(0), +// CHECK-NEXT: /*TargetOpcode::G_SEXT*//*Label 4*/ GIMT_Encode4([[L4:[0-9]+]]), GIMT_Encode4(0), +// CHECK-NEXT: /*TargetOpcode::G_ZEXT*//*Label 5*/ GIMT_Encode4([[L5:[0-9]+]]), // CHECK-NEXT: // Label 0: @[[#%u, mul(UPPER-LOWER, 4) + 10]] -// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 7*/ GIMT_Encode4(506), // Rule ID 4 // +// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 7*/ GIMT_Encode4([[L7:[0-9]+]]), // Rule ID 4 // // CHECK-NEXT: GIM_CheckFeatures, GIMT_Encode2(GIFBS_HasAnswerToEverything), // CHECK-NEXT: GIM_CheckSimplePredicate, GIMT_Encode2(GICXXPred_Simple_IsRule3Enabled), // CHECK-NEXT: // MIs[0] a @@ -156,8 +160,8 @@ def MyCombiner: GICombiner<"GenMyCombiner", [ // CHECK-NEXT: GIM_CheckIsSafeToFold, /*NumInsns*/1, // CHECK-NEXT: // Combiner Rule #3: InstTest1 // CHECK-NEXT: GIR_DoneWithCustomAction, /*Fn*/GIMT_Encode2(GICXXCustomAction_GICombiner2), -// CHECK-NEXT: // Label 7: @506 -// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 8*/ GIMT_Encode4(517), // Rule ID 3 // +// CHECK-NEXT: // Label 7: @[[L7]] +// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 8*/ GIMT_Encode4([[L8:[0-9]+]]), // Rule ID 3 // // CHECK-NEXT: GIM_CheckSimplePredicate, GIMT_Encode2(GICXXPred_Simple_IsRule2Enabled), // CHECK-NEXT: // MIs[0] a // CHECK-NEXT: // No operand predicates @@ -165,10 +169,10 @@ def MyCombiner: GICombiner<"GenMyCombiner", [ // CHECK-NEXT: // No operand predicates // CHECK-NEXT: // Combiner Rule #2: InstTest0 // CHECK-NEXT: GIR_DoneWithCustomAction, /*Fn*/GIMT_Encode2(GICXXCustomAction_GICombiner1), -// CHECK-NEXT: // Label 8: @517 +// CHECK-NEXT: // Label 8: @[[L8]] // CHECK-NEXT: GIM_Reject, -// CHECK-NEXT: // Label 1: @518 -// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 9*/ GIMT_Encode4(564), // Rule ID 6 // +// CHECK-NEXT: // Label 1: @[[L1]] +// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 9*/ GIMT_Encode4([[L9:[0-9]+]]), // Rule ID 6 // // CHECK-NEXT: GIM_CheckSimplePredicate, GIMT_Encode2(GICXXPred_Simple_IsRule5Enabled), // CHECK-NEXT: GIM_RootCheckType, /*Op*/2, /*Type*/GILLT_s32, // CHECK-NEXT: // MIs[0] dst @@ -185,10 +189,10 @@ def MyCombiner: GICombiner<"GenMyCombiner", [ // CHECK-NEXT: GIR_RootToRootCopy, /*OpIdx*/0, // dst // CHECK-NEXT: GIR_Copy, /*NewInsnID*/0, /*OldInsnID*/1, /*OpIdx*/1, // z // CHECK-NEXT: GIR_EraseRootFromParent_Done, -// CHECK-NEXT: // Label 9: @564 +// CHECK-NEXT: // Label 9: @[[L9]] // CHECK-NEXT: GIM_Reject, -// CHECK-NEXT: // Label 2: @565 -// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 10*/ GIMT_Encode4(598), // Rule ID 5 // +// CHECK-NEXT: // Label 2: @[[L2]] +// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 10*/ GIMT_Encode4([[L10:[0-9]+]]), // Rule ID 5 // // CHECK-NEXT: GIM_CheckSimplePredicate, GIMT_Encode2(GICXXPred_Simple_IsRule4Enabled), // CHECK-NEXT: // MIs[0] tmp // CHECK-NEXT: GIM_RecordInsnIgnoreCopies, /*DefineMI*/1, /*MI*/0, /*OpIdx*/0, // MIs[1] @@ -204,29 +208,29 @@ def MyCombiner: GICombiner<"GenMyCombiner", [ // CHECK-NEXT: GIR_RootToRootCopy, /*OpIdx*/1, // ptr // CHECK-NEXT: GIR_MergeMemOperands, /*InsnID*/0, /*NumInsns*/2, /*MergeInsnID's*/0, 1, // CHECK-NEXT: GIR_EraseRootFromParent_Done, -// CHECK-NEXT: // Label 10: @598 +// CHECK-NEXT: // Label 10: @[[L10]] // CHECK-NEXT: GIM_Reject, -// CHECK-NEXT: // Label 3: @599 -// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 11*/ GIMT_Encode4(610), // Rule ID 0 // +// CHECK-NEXT: // Label 3: @[[L3]] +// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 11*/ GIMT_Encode4([[L11:[0-9]+]]), // Rule ID 0 // // CHECK-NEXT: GIM_CheckSimplePredicate, GIMT_Encode2(GICXXPred_Simple_IsRule0Enabled), // CHECK-NEXT: // Combiner Rule #0: WipOpcodeTest0; wip_match_opcode 'G_TRUNC' // CHECK-NEXT: GIR_DoneWithCustomAction, /*Fn*/GIMT_Encode2(GICXXCustomAction_GICombiner0), -// CHECK-NEXT: // Label 11: @610 -// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 12*/ GIMT_Encode4(621), // Rule ID 1 // +// CHECK-NEXT: // Label 11: @[[L11]] +// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 12*/ GIMT_Encode4([[L12:[0-9]+]]), // Rule ID 1 // // CHECK-NEXT: GIM_CheckSimplePredicate, GIMT_Encode2(GICXXPred_Simple_IsRule1Enabled), // CHECK-NEXT: // Combiner Rule #1: WipOpcodeTest1; wip_match_opcode 'G_TRUNC' // CHECK-NEXT: GIR_DoneWithCustomAction, /*Fn*/GIMT_Encode2(GICXXCustomAction_GICombiner0), -// CHECK-NEXT: // Label 12: @621 +// CHECK-NEXT: // Label 12: @[[L12]] // CHECK-NEXT: GIM_Reject, -// CHECK-NEXT: // Label 4: @622 -// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 13*/ GIMT_Encode4(633), // Rule ID 2 // +// CHECK-NEXT: // Label 4: @[[L4]] +// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 13*/ GIMT_Encode4([[L13:[0-9]+]]), // Rule ID 2 // // CHECK-NEXT: GIM_CheckSimplePredicate, GIMT_Encode2(GICXXPred_Simple_IsRule1Enabled), // CHECK-NEXT: // Combiner Rule #1: WipOpcodeTest1; wip_match_opcode 'G_SEXT' // CHECK-NEXT: GIR_DoneWithCustomAction, /*Fn*/GIMT_Encode2(GICXXCustomAction_GICombiner0), -// CHECK-NEXT: // Label 13: @633 +// CHECK-NEXT: // Label 13: @[[L13]] // CHECK-NEXT: GIM_Reject, -// CHECK-NEXT: // Label 5: @634 -// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 14*/ GIMT_Encode4(668), // Rule ID 7 // +// CHECK-NEXT: // Label 5: @[[L5]] +// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 14*/ GIMT_Encode4([[L14:[0-9]+]]), // Rule ID 7 // // CHECK-NEXT: GIM_CheckSimplePredicate, GIMT_Encode2(GICXXPred_Simple_IsRule6Enabled), // CHECK-NEXT: // MIs[0] dst // CHECK-NEXT: // No operand predicates @@ -240,7 +244,7 @@ def MyCombiner: GICombiner<"GenMyCombiner", [ // CHECK-NEXT: GIR_RootToRootCopy, /*OpIdx*/0, // dst // CHECK-NEXT: GIR_AddSimpleTempRegister, /*InsnID*/0, /*TempRegID*/0, // CHECK-NEXT: GIR_EraseRootFromParent_Done, -// CHECK-NEXT: // Label 14: @668 +// CHECK-NEXT: // Label 14: @[[L14]] // CHECK-NEXT: GIM_Reject, // CHECK-NEXT: // Label 6: @[[#%u, DEFAULT]] // CHECK-NEXT: GIM_Reject, diff --git a/llvm/test/Transforms/InstCombine/loadstore-metadata.ll b/llvm/test/Transforms/InstCombine/loadstore-metadata.ll index b9a96937e57c778..247a02f0bcc14a5 100644 --- a/llvm/test/Transforms/InstCombine/loadstore-metadata.ll +++ b/llvm/test/Transforms/InstCombine/loadstore-metadata.ll @@ -173,6 +173,19 @@ define i32 @test_load_cast_combine_noundef(ptr %ptr) { ret i32 %c } +define i32 @test_load_cast_combine_noalias_addrspace(ptr %ptr) { +; Ensure (cast (load (...))) -> (load (cast (...))) preserves TBAA. +; CHECK-LABEL: @test_load_cast_combine_noalias_addrspace( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[L1:%.*]] = load i32, ptr [[PTR:%.*]], align 4 +; CHECK-NEXT: ret i32 [[L1]] +; +entry: + %l = load float, ptr %ptr, align 4, !noalias.addrspace !11 + %c = bitcast float %l to i32 + ret i32 %c +} + !0 = !{!1, !1, i64 0} !1 = !{!"scalar type", !2} !2 = !{!"root"} @@ -184,3 +197,4 @@ define i32 @test_load_cast_combine_noundef(ptr %ptr) { !8 = !{i32 1} !9 = !{i64 8} !10 = distinct !{} +!11 = !{i32 5, i32 6} diff --git a/llvm/test/Transforms/InstSimplify/bitcast-vector-fold.ll b/llvm/test/Transforms/InstSimplify/bitcast-vector-fold.ll index 68ff0859beb2a73..2e75a0d2c98abf5 100644 --- a/llvm/test/Transforms/InstSimplify/bitcast-vector-fold.ll +++ b/llvm/test/Transforms/InstSimplify/bitcast-vector-fold.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -passes=instsimplify -S | FileCheck %s +; RUN: opt < %s -passes=instsimplify -S | FileCheck %s --check-prefixes=CHECK,CONSTVEC +; RUN: opt < %s -passes=instsimplify -use-constant-fp-for-fixed-length-splat -use-constant-int-for-fixed-length-splat -S | FileCheck %s --check-prefixes=CHECK,CONSTSPLAT target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-f64:32:64-v64:64:64-v128:128:128" define <2 x i64> @test1() { @@ -67,17 +68,24 @@ define <4 x i32> @test8(<1 x i64> %y) { } define <4 x i32> @test9(<1 x i64> %y) { -; CHECK-LABEL: @test9( -; CHECK-NEXT: ret <4 x i32> +; CONSTVEC-LABEL: @test9( +; CONSTVEC-NEXT: ret <4 x i32> +; +; CONSTSPLAT-LABEL: @test9( +; CONSTSPLAT-NEXT: ret <4 x i32> splat (i32 -1) ; %c = bitcast <2 x i64> to <4 x i32> ret <4 x i32> %c } define <1 x i1> @test10() { -; CHECK-LABEL: @test10( -; CHECK-NEXT: [[RET:%.*]] = icmp eq <1 x i64> to i64)>, zeroinitializer -; CHECK-NEXT: ret <1 x i1> [[RET]] +; CONSTVEC-LABEL: @test10( +; CONSTVEC-NEXT: [[RET:%.*]] = icmp eq <1 x i64> to i64)>, zeroinitializer +; CONSTVEC-NEXT: ret <1 x i1> [[RET]] +; +; CONSTSPLAT-LABEL: @test10( +; CONSTSPLAT-NEXT: [[RET:%.*]] = icmp eq <1 x i64> splat (i64 -1), zeroinitializer +; CONSTSPLAT-NEXT: ret <1 x i1> [[RET]] ; %ret = icmp eq <1 x i64> to i64)>, zeroinitializer ret <1 x i1> %ret @@ -85,8 +93,11 @@ define <1 x i1> @test10() { ; from MultiSource/Benchmarks/Bullet define <2 x float> @foo() { -; CHECK-LABEL: @foo( -; CHECK-NEXT: ret <2 x float> +; CONSTVEC-LABEL: @foo( +; CONSTVEC-NEXT: ret <2 x float> +; +; CONSTSPLAT-LABEL: @foo( +; CONSTSPLAT-NEXT: ret <2 x float> splat (float 0xFFFFFFFFE0000000) ; %cast = bitcast i64 -1 to <2 x float> ret <2 x float> %cast @@ -94,16 +105,22 @@ define <2 x float> @foo() { define <2 x double> @foo2() { -; CHECK-LABEL: @foo2( -; CHECK-NEXT: ret <2 x double> +; CONSTVEC-LABEL: @foo2( +; CONSTVEC-NEXT: ret <2 x double> +; +; CONSTSPLAT-LABEL: @foo2( +; CONSTSPLAT-NEXT: ret <2 x double> splat (double 0xFFFFFFFFFFFFFFFF) ; %cast = bitcast i128 -1 to <2 x double> ret <2 x double> %cast } define <1 x float> @foo3() { -; CHECK-LABEL: @foo3( -; CHECK-NEXT: ret <1 x float> +; CONSTVEC-LABEL: @foo3( +; CONSTVEC-NEXT: ret <1 x float> +; +; CONSTSPLAT-LABEL: @foo3( +; CONSTSPLAT-NEXT: ret <1 x float> splat (float 0xFFFFFFFFE0000000) ; %cast = bitcast i32 -1 to <1 x float> ret <1 x float> %cast @@ -126,8 +143,11 @@ define double @foo5() { } define <2 x double> @foo6() { -; CHECK-LABEL: @foo6( -; CHECK-NEXT: ret <2 x double> +; CONSTVEC-LABEL: @foo6( +; CONSTVEC-NEXT: ret <2 x double> +; +; CONSTSPLAT-LABEL: @foo6( +; CONSTSPLAT-NEXT: ret <2 x double> splat (double 0xFFFFFFFFFFFFFFFF) ; %cast = bitcast <4 x i32> to <2 x double> ret <2 x double> %cast @@ -276,3 +296,14 @@ define <16 x i8> @bitcast_constexpr_16i8_8i16_u256uuu256uu() { %cast = bitcast <8 x i16> to <16 x i8> ret <16 x i8> %cast } + +define <1 x i32> @bitcast_constexpr_scalar_fp_to_vector_int() { +; CONSTVEC-LABEL: @bitcast_constexpr_scalar_fp_to_vector_int( +; CONSTVEC-NEXT: ret <1 x i32> +; +; CONSTSPLAT-LABEL: @bitcast_constexpr_scalar_fp_to_vector_int( +; CONSTSPLAT-NEXT: ret <1 x i32> bitcast (<1 x float> splat (float 1.000000e+00) to <1 x i32>) +; + %res = bitcast float 1.0 to <1 x i32> + ret <1 x i32> %res +} diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/interleave-allocsize-not-equal-typesize.ll b/llvm/test/Transforms/LoopVectorize/AArch64/interleave-allocsize-not-equal-typesize.ll index 014620487c202ce..79d7ab84b3a0f01 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/interleave-allocsize-not-equal-typesize.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/interleave-allocsize-not-equal-typesize.ll @@ -29,8 +29,7 @@ define void @pr58722_load_interleave_group(ptr %src, ptr %dst) { ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[TMP3]] -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 0 -; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i32>, ptr [[TMP8]], align 4 +; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i32>, ptr [[TMP4]], align 4 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 1 ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i64 1 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/interleaved-store-of-first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/AArch64/interleaved-store-of-first-order-recurrence.ll index 997ef7466d5cfca..76d0887d1c53cc8 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/interleaved-store-of-first-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/interleaved-store-of-first-order-recurrence.ll @@ -14,12 +14,11 @@ define void @interleaved_store_first_order_recurrence(ptr noalias %src, ptr %dst ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[BROADCAST_SPLAT]], <4 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = mul nuw nsw i64 [[TMP0]], 3 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 [[TMP3]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 0 ; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x i32> zeroinitializer, <4 x i32> [[TMP2]], <8 x i32> ; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLAT]], <4 x i32> poison, <8 x i32> ; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <8 x i32> [[TMP9]], <8 x i32> [[TMP10]], <12 x i32> ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <12 x i32> [[TMP11]], <12 x i32> poison, <12 x i32> -; CHECK-NEXT: store <12 x i32> [[INTERLEAVED_VEC]], ptr [[TMP7]], align 4 +; CHECK-NEXT: store <12 x i32> [[INTERLEAVED_VEC]], ptr [[TMP4]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP12]], label %middle.block, label %vector.body diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll index 11fe8a268477936..93ca7385d9ea66a 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll @@ -525,8 +525,7 @@ define void @fadd_strict_interleave(ptr noalias nocapture readonly %a, ptr noali ; CHECK-UNORDERED-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 ; CHECK-UNORDERED-NEXT: [[TMP11:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-UNORDERED-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP11]] -; CHECK-UNORDERED-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i32 0 -; CHECK-UNORDERED-NEXT: [[WIDE_VEC:%.*]] = load , ptr [[TMP13]], align 4 +; CHECK-UNORDERED-NEXT: [[WIDE_VEC:%.*]] = load , ptr [[TMP12]], align 4 ; CHECK-UNORDERED-NEXT: [[STRIDED_VEC:%.*]] = call { , } @llvm.vector.deinterleave2.nxv8f32( [[WIDE_VEC]]) ; CHECK-UNORDERED-NEXT: [[TMP14:%.*]] = extractvalue { , } [[STRIDED_VEC]], 0 ; CHECK-UNORDERED-NEXT: [[TMP15:%.*]] = extractvalue { , } [[STRIDED_VEC]], 1 @@ -595,8 +594,7 @@ define void @fadd_strict_interleave(ptr noalias nocapture readonly %a, ptr noali ; CHECK-ORDERED-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 ; CHECK-ORDERED-NEXT: [[TMP9:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-ORDERED-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP9]] -; CHECK-ORDERED-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[TMP10]], i32 0 -; CHECK-ORDERED-NEXT: [[WIDE_VEC:%.*]] = load , ptr [[TMP11]], align 4 +; CHECK-ORDERED-NEXT: [[WIDE_VEC:%.*]] = load , ptr [[TMP10]], align 4 ; CHECK-ORDERED-NEXT: [[STRIDED_VEC:%.*]] = call { , } @llvm.vector.deinterleave2.nxv8f32( [[WIDE_VEC]]) ; CHECK-ORDERED-NEXT: [[TMP12:%.*]] = extractvalue { , } [[STRIDED_VEC]], 0 ; CHECK-ORDERED-NEXT: [[TMP13:%.*]] = extractvalue { , } [[STRIDED_VEC]], 1 @@ -669,9 +667,8 @@ define void @fadd_strict_interleave(ptr noalias nocapture readonly %a, ptr noali ; CHECK-ORDERED-TF-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 ; CHECK-ORDERED-TF-NEXT: [[TMP13:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-ORDERED-TF-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP13]] -; CHECK-ORDERED-TF-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i32 0 ; CHECK-ORDERED-TF-NEXT: [[INTERLEAVED_MASK:%.*]] = call @llvm.vector.interleave2.nxv8i1( [[ACTIVE_LANE_MASK]], [[ACTIVE_LANE_MASK]]) -; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_VEC:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP15]], i32 4, [[INTERLEAVED_MASK]], poison) +; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_VEC:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP14]], i32 4, [[INTERLEAVED_MASK]], poison) ; CHECK-ORDERED-TF-NEXT: [[STRIDED_VEC:%.*]] = call { , } @llvm.vector.deinterleave2.nxv8f32( [[WIDE_MASKED_VEC]]) ; CHECK-ORDERED-TF-NEXT: [[TMP16:%.*]] = extractvalue { , } [[STRIDED_VEC]], 0 ; CHECK-ORDERED-TF-NEXT: [[TMP17:%.*]] = extractvalue { , } [[STRIDED_VEC]], 1 diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll index d6c643df955a7b5..c0933a9445f94da 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll @@ -148,8 +148,7 @@ define void @test_stride2_4i32(ptr readonly %data, ptr noalias nocapture %dst, i ; CHECK-NEXT: [[TMP3:%.*]] = mul nuw nsw i32 [[TMP2]], 2 ; CHECK-NEXT: [[TMP4:%.*]] = add nuw nsw i32 [[TMP3]], 2 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[DATA:%.*]], i32 [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0 -; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i32>, ptr [[TMP6]], align 4 +; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i32>, ptr [[TMP5]], align 4 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP7:%.*]] = add nsw <4 x i32> , [[STRIDED_VEC]] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i32 [[TMP2]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll b/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll index 5005506e3833460..0e55ad65cdb2c23 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll @@ -431,12 +431,10 @@ define void @gather_interleave_group_with_dead_insert_pos(i64 %N, ptr noalias %s ; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 32 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP5]] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP6]] -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[TMP7]], i32 0 -; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <32 x i8>, ptr [[TMP9]], align 1 +; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <32 x i8>, ptr [[TMP7]], align 1 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <32 x i8> [[WIDE_VEC]], <32 x i8> poison, <8 x i32> ; CHECK-NEXT: [[STRIDED_VEC4:%.*]] = shufflevector <32 x i8> [[WIDE_VEC]], <32 x i8> poison, <8 x i32> -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[TMP8]], i32 0 -; CHECK-NEXT: [[WIDE_VEC2:%.*]] = load <32 x i8>, ptr [[TMP10]], align 1 +; CHECK-NEXT: [[WIDE_VEC2:%.*]] = load <32 x i8>, ptr [[TMP8]], align 1 ; CHECK-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <32 x i8> [[WIDE_VEC2]], <32 x i8> poison, <8 x i32> ; CHECK-NEXT: [[STRIDED_VEC5:%.*]] = shufflevector <32 x i8> [[WIDE_VEC2]], <32 x i8> poison, <8 x i32> ; CHECK-NEXT: [[TMP11:%.*]] = zext <8 x i8> [[STRIDED_VEC4]] to <8 x i32> diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-accesses.ll b/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-accesses.ll index f70e9d6c9416ac6..2789ab484e1b665 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-accesses.ll @@ -23,15 +23,14 @@ define void @load_store_factor2_i32(ptr %p) { ; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP7:%.*]] = shl i64 [[TMP6]], 1 ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i32, ptr [[TMP8]], i32 0 -; CHECK-NEXT: [[WIDE_VEC:%.*]] = load , ptr [[TMP9]], align 4 +; CHECK-NEXT: [[WIDE_VEC:%.*]] = load , ptr [[TMP8]], align 4 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = call { , } @llvm.vector.deinterleave2.nxv8i32( [[WIDE_VEC]]) ; CHECK-NEXT: [[TMP10:%.*]] = extractvalue { , } [[STRIDED_VEC]], 0 ; CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , } [[STRIDED_VEC]], 1 ; CHECK-NEXT: [[TMP12:%.*]] = add [[TMP10]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) ; CHECK-NEXT: [[TMP15:%.*]] = add [[TMP11]], shufflevector ( insertelement ( poison, i32 2, i64 0), poison, zeroinitializer) ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = call @llvm.vector.interleave2.nxv8i32( [[TMP12]], [[TMP15]]) -; CHECK-NEXT: store [[INTERLEAVED_VEC]], ptr [[TMP9]], align 4 +; CHECK-NEXT: store [[INTERLEAVED_VEC]], ptr [[TMP8]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -69,15 +68,14 @@ define void @load_store_factor2_i32(ptr %p) { ; FIXED-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; FIXED-NEXT: [[TMP1:%.*]] = shl i64 [[TMP0]], 1 ; FIXED-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[TMP1]] -; FIXED-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[TMP2]], i32 0 -; FIXED-NEXT: [[WIDE_VEC:%.*]] = load <16 x i32>, ptr [[TMP3]], align 4 +; FIXED-NEXT: [[WIDE_VEC:%.*]] = load <16 x i32>, ptr [[TMP2]], align 4 ; FIXED-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> poison, <8 x i32> ; FIXED-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> poison, <8 x i32> ; FIXED-NEXT: [[TMP4:%.*]] = add <8 x i32> [[STRIDED_VEC]], ; FIXED-NEXT: [[TMP7:%.*]] = add <8 x i32> [[STRIDED_VEC1]], ; FIXED-NEXT: [[TMP9:%.*]] = shufflevector <8 x i32> [[TMP4]], <8 x i32> [[TMP7]], <16 x i32> ; FIXED-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <16 x i32> [[TMP9]], <16 x i32> poison, <16 x i32> -; FIXED-NEXT: store <16 x i32> [[INTERLEAVED_VEC]], ptr [[TMP3]], align 4 +; FIXED-NEXT: store <16 x i32> [[INTERLEAVED_VEC]], ptr [[TMP2]], align 4 ; FIXED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; FIXED-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; FIXED-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -123,15 +121,14 @@ define void @load_store_factor2_i32(ptr %p) { ; SCALABLE-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 ; SCALABLE-NEXT: [[TMP7:%.*]] = shl i64 [[TMP6]], 1 ; SCALABLE-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[TMP7]] -; SCALABLE-NEXT: [[TMP9:%.*]] = getelementptr i32, ptr [[TMP8]], i32 0 -; SCALABLE-NEXT: [[WIDE_VEC:%.*]] = load , ptr [[TMP9]], align 4 +; SCALABLE-NEXT: [[WIDE_VEC:%.*]] = load , ptr [[TMP8]], align 4 ; SCALABLE-NEXT: [[STRIDED_VEC:%.*]] = call { , } @llvm.vector.deinterleave2.nxv8i32( [[WIDE_VEC]]) ; SCALABLE-NEXT: [[TMP10:%.*]] = extractvalue { , } [[STRIDED_VEC]], 0 ; SCALABLE-NEXT: [[TMP11:%.*]] = extractvalue { , } [[STRIDED_VEC]], 1 ; SCALABLE-NEXT: [[TMP12:%.*]] = add [[TMP10]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) ; SCALABLE-NEXT: [[TMP15:%.*]] = add [[TMP11]], shufflevector ( insertelement ( poison, i32 2, i64 0), poison, zeroinitializer) ; SCALABLE-NEXT: [[INTERLEAVED_VEC:%.*]] = call @llvm.vector.interleave2.nxv8i32( [[TMP12]], [[TMP15]]) -; SCALABLE-NEXT: store [[INTERLEAVED_VEC]], ptr [[TMP9]], align 4 +; SCALABLE-NEXT: store [[INTERLEAVED_VEC]], ptr [[TMP8]], align 4 ; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; SCALABLE-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; SCALABLE-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -203,15 +200,14 @@ define void @load_store_factor2_i64(ptr %p) { ; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP7:%.*]] = shl i64 [[TMP6]], 1 ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i64, ptr [[TMP8]], i32 0 -; CHECK-NEXT: [[WIDE_VEC:%.*]] = load , ptr [[TMP9]], align 8 +; CHECK-NEXT: [[WIDE_VEC:%.*]] = load , ptr [[TMP8]], align 8 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = call { , } @llvm.vector.deinterleave2.nxv4i64( [[WIDE_VEC]]) ; CHECK-NEXT: [[TMP10:%.*]] = extractvalue { , } [[STRIDED_VEC]], 0 ; CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , } [[STRIDED_VEC]], 1 ; CHECK-NEXT: [[TMP12:%.*]] = add [[TMP10]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) ; CHECK-NEXT: [[TMP15:%.*]] = add [[TMP11]], shufflevector ( insertelement ( poison, i64 2, i64 0), poison, zeroinitializer) ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = call @llvm.vector.interleave2.nxv4i64( [[TMP12]], [[TMP15]]) -; CHECK-NEXT: store [[INTERLEAVED_VEC]], ptr [[TMP9]], align 8 +; CHECK-NEXT: store [[INTERLEAVED_VEC]], ptr [[TMP8]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] @@ -249,15 +245,14 @@ define void @load_store_factor2_i64(ptr %p) { ; FIXED-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; FIXED-NEXT: [[TMP1:%.*]] = shl i64 [[TMP0]], 1 ; FIXED-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP1]] -; FIXED-NEXT: [[TMP3:%.*]] = getelementptr i64, ptr [[TMP2]], i32 0 -; FIXED-NEXT: [[WIDE_VEC:%.*]] = load <8 x i64>, ptr [[TMP3]], align 8 +; FIXED-NEXT: [[WIDE_VEC:%.*]] = load <8 x i64>, ptr [[TMP2]], align 8 ; FIXED-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i64> [[WIDE_VEC]], <8 x i64> poison, <4 x i32> ; FIXED-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <8 x i64> [[WIDE_VEC]], <8 x i64> poison, <4 x i32> ; FIXED-NEXT: [[TMP4:%.*]] = add <4 x i64> [[STRIDED_VEC]], ; FIXED-NEXT: [[TMP7:%.*]] = add <4 x i64> [[STRIDED_VEC1]], ; FIXED-NEXT: [[TMP9:%.*]] = shufflevector <4 x i64> [[TMP4]], <4 x i64> [[TMP7]], <8 x i32> ; FIXED-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i64> [[TMP9]], <8 x i64> poison, <8 x i32> -; FIXED-NEXT: store <8 x i64> [[INTERLEAVED_VEC]], ptr [[TMP3]], align 8 +; FIXED-NEXT: store <8 x i64> [[INTERLEAVED_VEC]], ptr [[TMP2]], align 8 ; FIXED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; FIXED-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; FIXED-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] @@ -303,15 +298,14 @@ define void @load_store_factor2_i64(ptr %p) { ; SCALABLE-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 ; SCALABLE-NEXT: [[TMP7:%.*]] = shl i64 [[TMP6]], 1 ; SCALABLE-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP7]] -; SCALABLE-NEXT: [[TMP9:%.*]] = getelementptr i64, ptr [[TMP8]], i32 0 -; SCALABLE-NEXT: [[WIDE_VEC:%.*]] = load , ptr [[TMP9]], align 8 +; SCALABLE-NEXT: [[WIDE_VEC:%.*]] = load , ptr [[TMP8]], align 8 ; SCALABLE-NEXT: [[STRIDED_VEC:%.*]] = call { , } @llvm.vector.deinterleave2.nxv4i64( [[WIDE_VEC]]) ; SCALABLE-NEXT: [[TMP10:%.*]] = extractvalue { , } [[STRIDED_VEC]], 0 ; SCALABLE-NEXT: [[TMP11:%.*]] = extractvalue { , } [[STRIDED_VEC]], 1 ; SCALABLE-NEXT: [[TMP12:%.*]] = add [[TMP10]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) ; SCALABLE-NEXT: [[TMP15:%.*]] = add [[TMP11]], shufflevector ( insertelement ( poison, i64 2, i64 0), poison, zeroinitializer) ; SCALABLE-NEXT: [[INTERLEAVED_VEC:%.*]] = call @llvm.vector.interleave2.nxv4i64( [[TMP12]], [[TMP15]]) -; SCALABLE-NEXT: store [[INTERLEAVED_VEC]], ptr [[TMP9]], align 8 +; SCALABLE-NEXT: store [[INTERLEAVED_VEC]], ptr [[TMP8]], align 8 ; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; SCALABLE-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; SCALABLE-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] @@ -374,8 +368,7 @@ define void @load_store_factor3_i32(ptr %p) { ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 3 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[TMP2]], i32 0 -; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <24 x i32>, ptr [[TMP3]], align 4 +; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <24 x i32>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <24 x i32> [[WIDE_VEC]], <24 x i32> poison, <8 x i32> ; CHECK-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <24 x i32> [[WIDE_VEC]], <24 x i32> poison, <8 x i32> ; CHECK-NEXT: [[STRIDED_VEC2:%.*]] = shufflevector <24 x i32> [[WIDE_VEC]], <24 x i32> poison, <8 x i32> @@ -386,7 +379,7 @@ define void @load_store_factor3_i32(ptr %p) { ; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <8 x i32> [[TMP9]], <8 x i32> poison, <16 x i32> ; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <16 x i32> [[TMP11]], <16 x i32> [[TMP12]], <24 x i32> ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <24 x i32> [[TMP13]], <24 x i32> poison, <24 x i32> -; CHECK-NEXT: store <24 x i32> [[INTERLEAVED_VEC]], ptr [[TMP3]], align 4 +; CHECK-NEXT: store <24 x i32> [[INTERLEAVED_VEC]], ptr [[TMP2]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] @@ -428,8 +421,7 @@ define void @load_store_factor3_i32(ptr %p) { ; FIXED-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; FIXED-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 3 ; FIXED-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[TMP1]] -; FIXED-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[TMP2]], i32 0 -; FIXED-NEXT: [[WIDE_VEC:%.*]] = load <24 x i32>, ptr [[TMP3]], align 4 +; FIXED-NEXT: [[WIDE_VEC:%.*]] = load <24 x i32>, ptr [[TMP2]], align 4 ; FIXED-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <24 x i32> [[WIDE_VEC]], <24 x i32> poison, <8 x i32> ; FIXED-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <24 x i32> [[WIDE_VEC]], <24 x i32> poison, <8 x i32> ; FIXED-NEXT: [[STRIDED_VEC2:%.*]] = shufflevector <24 x i32> [[WIDE_VEC]], <24 x i32> poison, <8 x i32> @@ -440,7 +432,7 @@ define void @load_store_factor3_i32(ptr %p) { ; FIXED-NEXT: [[TMP12:%.*]] = shufflevector <8 x i32> [[TMP9]], <8 x i32> poison, <16 x i32> ; FIXED-NEXT: [[TMP13:%.*]] = shufflevector <16 x i32> [[TMP11]], <16 x i32> [[TMP12]], <24 x i32> ; FIXED-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <24 x i32> [[TMP13]], <24 x i32> poison, <24 x i32> -; FIXED-NEXT: store <24 x i32> [[INTERLEAVED_VEC]], ptr [[TMP3]], align 4 +; FIXED-NEXT: store <24 x i32> [[INTERLEAVED_VEC]], ptr [[TMP2]], align 4 ; FIXED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; FIXED-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; FIXED-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] @@ -482,8 +474,7 @@ define void @load_store_factor3_i32(ptr %p) { ; SCALABLE-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; SCALABLE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 3 ; SCALABLE-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[TMP1]] -; SCALABLE-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[TMP2]], i32 0 -; SCALABLE-NEXT: [[WIDE_VEC:%.*]] = load <24 x i32>, ptr [[TMP3]], align 4 +; SCALABLE-NEXT: [[WIDE_VEC:%.*]] = load <24 x i32>, ptr [[TMP2]], align 4 ; SCALABLE-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <24 x i32> [[WIDE_VEC]], <24 x i32> poison, <8 x i32> ; SCALABLE-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <24 x i32> [[WIDE_VEC]], <24 x i32> poison, <8 x i32> ; SCALABLE-NEXT: [[STRIDED_VEC2:%.*]] = shufflevector <24 x i32> [[WIDE_VEC]], <24 x i32> poison, <8 x i32> @@ -494,7 +485,7 @@ define void @load_store_factor3_i32(ptr %p) { ; SCALABLE-NEXT: [[TMP12:%.*]] = shufflevector <8 x i32> [[TMP9]], <8 x i32> poison, <16 x i32> ; SCALABLE-NEXT: [[TMP13:%.*]] = shufflevector <16 x i32> [[TMP11]], <16 x i32> [[TMP12]], <24 x i32> ; SCALABLE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <24 x i32> [[TMP13]], <24 x i32> poison, <24 x i32> -; SCALABLE-NEXT: store <24 x i32> [[INTERLEAVED_VEC]], ptr [[TMP3]], align 4 +; SCALABLE-NEXT: store <24 x i32> [[INTERLEAVED_VEC]], ptr [[TMP2]], align 4 ; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; SCALABLE-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; SCALABLE-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] @@ -567,8 +558,7 @@ define void @load_store_factor3_i64(ptr %p) { ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 3 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i64, ptr [[TMP2]], i32 0 -; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <12 x i64>, ptr [[TMP3]], align 8 +; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <12 x i64>, ptr [[TMP2]], align 8 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <12 x i64> [[WIDE_VEC]], <12 x i64> poison, <4 x i32> ; CHECK-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <12 x i64> [[WIDE_VEC]], <12 x i64> poison, <4 x i32> ; CHECK-NEXT: [[STRIDED_VEC2:%.*]] = shufflevector <12 x i64> [[WIDE_VEC]], <12 x i64> poison, <4 x i32> @@ -579,7 +569,7 @@ define void @load_store_factor3_i64(ptr %p) { ; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x i64> [[TMP9]], <4 x i64> poison, <8 x i32> ; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <8 x i64> [[TMP11]], <8 x i64> [[TMP12]], <12 x i32> ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <12 x i64> [[TMP13]], <12 x i64> poison, <12 x i32> -; CHECK-NEXT: store <12 x i64> [[INTERLEAVED_VEC]], ptr [[TMP3]], align 8 +; CHECK-NEXT: store <12 x i64> [[INTERLEAVED_VEC]], ptr [[TMP2]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] @@ -621,8 +611,7 @@ define void @load_store_factor3_i64(ptr %p) { ; FIXED-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; FIXED-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 3 ; FIXED-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP1]] -; FIXED-NEXT: [[TMP3:%.*]] = getelementptr i64, ptr [[TMP2]], i32 0 -; FIXED-NEXT: [[WIDE_VEC:%.*]] = load <12 x i64>, ptr [[TMP3]], align 8 +; FIXED-NEXT: [[WIDE_VEC:%.*]] = load <12 x i64>, ptr [[TMP2]], align 8 ; FIXED-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <12 x i64> [[WIDE_VEC]], <12 x i64> poison, <4 x i32> ; FIXED-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <12 x i64> [[WIDE_VEC]], <12 x i64> poison, <4 x i32> ; FIXED-NEXT: [[STRIDED_VEC2:%.*]] = shufflevector <12 x i64> [[WIDE_VEC]], <12 x i64> poison, <4 x i32> @@ -633,7 +622,7 @@ define void @load_store_factor3_i64(ptr %p) { ; FIXED-NEXT: [[TMP12:%.*]] = shufflevector <4 x i64> [[TMP9]], <4 x i64> poison, <8 x i32> ; FIXED-NEXT: [[TMP13:%.*]] = shufflevector <8 x i64> [[TMP11]], <8 x i64> [[TMP12]], <12 x i32> ; FIXED-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <12 x i64> [[TMP13]], <12 x i64> poison, <12 x i32> -; FIXED-NEXT: store <12 x i64> [[INTERLEAVED_VEC]], ptr [[TMP3]], align 8 +; FIXED-NEXT: store <12 x i64> [[INTERLEAVED_VEC]], ptr [[TMP2]], align 8 ; FIXED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; FIXED-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; FIXED-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] @@ -675,8 +664,7 @@ define void @load_store_factor3_i64(ptr %p) { ; SCALABLE-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; SCALABLE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 3 ; SCALABLE-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP1]] -; SCALABLE-NEXT: [[TMP3:%.*]] = getelementptr i64, ptr [[TMP2]], i32 0 -; SCALABLE-NEXT: [[WIDE_VEC:%.*]] = load <12 x i64>, ptr [[TMP3]], align 8 +; SCALABLE-NEXT: [[WIDE_VEC:%.*]] = load <12 x i64>, ptr [[TMP2]], align 8 ; SCALABLE-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <12 x i64> [[WIDE_VEC]], <12 x i64> poison, <4 x i32> ; SCALABLE-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <12 x i64> [[WIDE_VEC]], <12 x i64> poison, <4 x i32> ; SCALABLE-NEXT: [[STRIDED_VEC2:%.*]] = shufflevector <12 x i64> [[WIDE_VEC]], <12 x i64> poison, <4 x i32> @@ -687,7 +675,7 @@ define void @load_store_factor3_i64(ptr %p) { ; SCALABLE-NEXT: [[TMP12:%.*]] = shufflevector <4 x i64> [[TMP9]], <4 x i64> poison, <8 x i32> ; SCALABLE-NEXT: [[TMP13:%.*]] = shufflevector <8 x i64> [[TMP11]], <8 x i64> [[TMP12]], <12 x i32> ; SCALABLE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <12 x i64> [[TMP13]], <12 x i64> poison, <12 x i32> -; SCALABLE-NEXT: store <12 x i64> [[INTERLEAVED_VEC]], ptr [[TMP3]], align 8 +; SCALABLE-NEXT: store <12 x i64> [[INTERLEAVED_VEC]], ptr [[TMP2]], align 8 ; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; SCALABLE-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; SCALABLE-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] @@ -760,8 +748,7 @@ define void @load_store_factor8(ptr %p) { ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[TMP0]], 3 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i64, ptr [[TMP2]], i32 0 -; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <16 x i64>, ptr [[TMP3]], align 8 +; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <16 x i64>, ptr [[TMP2]], align 8 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i64> [[WIDE_VEC]], <16 x i64> poison, <2 x i32> ; CHECK-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <16 x i64> [[WIDE_VEC]], <16 x i64> poison, <2 x i32> ; CHECK-NEXT: [[STRIDED_VEC2:%.*]] = shufflevector <16 x i64> [[WIDE_VEC]], <16 x i64> poison, <2 x i32> @@ -786,7 +773,7 @@ define void @load_store_factor8(ptr %p) { ; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <4 x i64> [[TMP23]], <4 x i64> [[TMP24]], <8 x i32> ; CHECK-NEXT: [[TMP27:%.*]] = shufflevector <8 x i64> [[TMP25]], <8 x i64> [[TMP26]], <16 x i32> ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <16 x i64> [[TMP27]], <16 x i64> poison, <16 x i32> -; CHECK-NEXT: store <16 x i64> [[INTERLEAVED_VEC]], ptr [[TMP3]], align 8 +; CHECK-NEXT: store <16 x i64> [[INTERLEAVED_VEC]], ptr [[TMP2]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; CHECK-NEXT: br i1 [[TMP28]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] @@ -853,8 +840,7 @@ define void @load_store_factor8(ptr %p) { ; FIXED-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; FIXED-NEXT: [[TMP1:%.*]] = shl i64 [[TMP0]], 3 ; FIXED-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP1]] -; FIXED-NEXT: [[TMP3:%.*]] = getelementptr i64, ptr [[TMP2]], i32 0 -; FIXED-NEXT: [[WIDE_VEC:%.*]] = load <16 x i64>, ptr [[TMP3]], align 8 +; FIXED-NEXT: [[WIDE_VEC:%.*]] = load <16 x i64>, ptr [[TMP2]], align 8 ; FIXED-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i64> [[WIDE_VEC]], <16 x i64> poison, <2 x i32> ; FIXED-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <16 x i64> [[WIDE_VEC]], <16 x i64> poison, <2 x i32> ; FIXED-NEXT: [[STRIDED_VEC2:%.*]] = shufflevector <16 x i64> [[WIDE_VEC]], <16 x i64> poison, <2 x i32> @@ -879,7 +865,7 @@ define void @load_store_factor8(ptr %p) { ; FIXED-NEXT: [[TMP26:%.*]] = shufflevector <4 x i64> [[TMP23]], <4 x i64> [[TMP24]], <8 x i32> ; FIXED-NEXT: [[TMP27:%.*]] = shufflevector <8 x i64> [[TMP25]], <8 x i64> [[TMP26]], <16 x i32> ; FIXED-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <16 x i64> [[TMP27]], <16 x i64> poison, <16 x i32> -; FIXED-NEXT: store <16 x i64> [[INTERLEAVED_VEC]], ptr [[TMP3]], align 8 +; FIXED-NEXT: store <16 x i64> [[INTERLEAVED_VEC]], ptr [[TMP2]], align 8 ; FIXED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; FIXED-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; FIXED-NEXT: br i1 [[TMP28]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] @@ -946,8 +932,7 @@ define void @load_store_factor8(ptr %p) { ; SCALABLE-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; SCALABLE-NEXT: [[TMP1:%.*]] = shl i64 [[TMP0]], 3 ; SCALABLE-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP1]] -; SCALABLE-NEXT: [[TMP3:%.*]] = getelementptr i64, ptr [[TMP2]], i32 0 -; SCALABLE-NEXT: [[WIDE_VEC:%.*]] = load <16 x i64>, ptr [[TMP3]], align 8 +; SCALABLE-NEXT: [[WIDE_VEC:%.*]] = load <16 x i64>, ptr [[TMP2]], align 8 ; SCALABLE-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i64> [[WIDE_VEC]], <16 x i64> poison, <2 x i32> ; SCALABLE-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <16 x i64> [[WIDE_VEC]], <16 x i64> poison, <2 x i32> ; SCALABLE-NEXT: [[STRIDED_VEC2:%.*]] = shufflevector <16 x i64> [[WIDE_VEC]], <16 x i64> poison, <2 x i32> @@ -972,7 +957,7 @@ define void @load_store_factor8(ptr %p) { ; SCALABLE-NEXT: [[TMP26:%.*]] = shufflevector <4 x i64> [[TMP23]], <4 x i64> [[TMP24]], <8 x i32> ; SCALABLE-NEXT: [[TMP27:%.*]] = shufflevector <8 x i64> [[TMP25]], <8 x i64> [[TMP26]], <16 x i32> ; SCALABLE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <16 x i64> [[TMP27]], <16 x i64> poison, <16 x i32> -; SCALABLE-NEXT: store <16 x i64> [[INTERLEAVED_VEC]], ptr [[TMP3]], align 8 +; SCALABLE-NEXT: store <16 x i64> [[INTERLEAVED_VEC]], ptr [[TMP2]], align 8 ; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; SCALABLE-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; SCALABLE-NEXT: br i1 [[TMP28]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] @@ -1109,8 +1094,7 @@ define void @combine_load_factor2_i32(ptr noalias %p, ptr noalias %q) { ; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP7:%.*]] = shl i64 [[TMP6]], 1 ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i32, ptr [[TMP8]], i32 0 -; CHECK-NEXT: [[WIDE_VEC:%.*]] = load , ptr [[TMP9]], align 4 +; CHECK-NEXT: [[WIDE_VEC:%.*]] = load , ptr [[TMP8]], align 4 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = call { , } @llvm.vector.deinterleave2.nxv8i32( [[WIDE_VEC]]) ; CHECK-NEXT: [[TMP10:%.*]] = extractvalue { , } [[STRIDED_VEC]], 0 ; CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , } [[STRIDED_VEC]], 1 @@ -1157,12 +1141,10 @@ define void @combine_load_factor2_i32(ptr noalias %p, ptr noalias %q) { ; FIXED-NEXT: [[TMP3:%.*]] = shl i64 [[TMP1]], 1 ; FIXED-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[TMP2]] ; FIXED-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[P]], i64 [[TMP3]] -; FIXED-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[TMP4]], i32 0 -; FIXED-NEXT: [[WIDE_VEC:%.*]] = load <16 x i32>, ptr [[TMP6]], align 4 +; FIXED-NEXT: [[WIDE_VEC:%.*]] = load <16 x i32>, ptr [[TMP4]], align 4 ; FIXED-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> poison, <8 x i32> ; FIXED-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> poison, <8 x i32> -; FIXED-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[TMP5]], i32 0 -; FIXED-NEXT: [[WIDE_VEC1:%.*]] = load <16 x i32>, ptr [[TMP7]], align 4 +; FIXED-NEXT: [[WIDE_VEC1:%.*]] = load <16 x i32>, ptr [[TMP5]], align 4 ; FIXED-NEXT: [[STRIDED_VEC2:%.*]] = shufflevector <16 x i32> [[WIDE_VEC1]], <16 x i32> poison, <8 x i32> ; FIXED-NEXT: [[STRIDED_VEC4:%.*]] = shufflevector <16 x i32> [[WIDE_VEC1]], <16 x i32> poison, <8 x i32> ; FIXED-NEXT: [[TMP8:%.*]] = add <8 x i32> [[STRIDED_VEC]], [[STRIDED_VEC3]] @@ -1216,8 +1198,7 @@ define void @combine_load_factor2_i32(ptr noalias %p, ptr noalias %q) { ; SCALABLE-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 ; SCALABLE-NEXT: [[TMP7:%.*]] = shl i64 [[TMP6]], 1 ; SCALABLE-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[TMP7]] -; SCALABLE-NEXT: [[TMP9:%.*]] = getelementptr i32, ptr [[TMP8]], i32 0 -; SCALABLE-NEXT: [[WIDE_VEC:%.*]] = load , ptr [[TMP9]], align 4 +; SCALABLE-NEXT: [[WIDE_VEC:%.*]] = load , ptr [[TMP8]], align 4 ; SCALABLE-NEXT: [[STRIDED_VEC:%.*]] = call { , } @llvm.vector.deinterleave2.nxv8i32( [[WIDE_VEC]]) ; SCALABLE-NEXT: [[TMP10:%.*]] = extractvalue { , } [[STRIDED_VEC]], 0 ; SCALABLE-NEXT: [[TMP11:%.*]] = extractvalue { , } [[STRIDED_VEC]], 1 @@ -1296,8 +1277,7 @@ define void @combine_load_factor2_i64(ptr noalias %p, ptr noalias %q) { ; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP7:%.*]] = shl i64 [[TMP6]], 1 ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i64, ptr [[TMP8]], i32 0 -; CHECK-NEXT: [[WIDE_VEC:%.*]] = load , ptr [[TMP9]], align 8 +; CHECK-NEXT: [[WIDE_VEC:%.*]] = load , ptr [[TMP8]], align 8 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = call { , } @llvm.vector.deinterleave2.nxv4i64( [[WIDE_VEC]]) ; CHECK-NEXT: [[TMP10:%.*]] = extractvalue { , } [[STRIDED_VEC]], 0 ; CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , } [[STRIDED_VEC]], 1 @@ -1344,12 +1324,10 @@ define void @combine_load_factor2_i64(ptr noalias %p, ptr noalias %q) { ; FIXED-NEXT: [[TMP3:%.*]] = shl i64 [[TMP1]], 1 ; FIXED-NEXT: [[TMP4:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP2]] ; FIXED-NEXT: [[TMP5:%.*]] = getelementptr i64, ptr [[P]], i64 [[TMP3]] -; FIXED-NEXT: [[TMP6:%.*]] = getelementptr i64, ptr [[TMP4]], i32 0 -; FIXED-NEXT: [[WIDE_VEC:%.*]] = load <8 x i64>, ptr [[TMP6]], align 8 +; FIXED-NEXT: [[WIDE_VEC:%.*]] = load <8 x i64>, ptr [[TMP4]], align 8 ; FIXED-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i64> [[WIDE_VEC]], <8 x i64> poison, <4 x i32> ; FIXED-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <8 x i64> [[WIDE_VEC]], <8 x i64> poison, <4 x i32> -; FIXED-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[TMP5]], i32 0 -; FIXED-NEXT: [[WIDE_VEC1:%.*]] = load <8 x i64>, ptr [[TMP7]], align 8 +; FIXED-NEXT: [[WIDE_VEC1:%.*]] = load <8 x i64>, ptr [[TMP5]], align 8 ; FIXED-NEXT: [[STRIDED_VEC2:%.*]] = shufflevector <8 x i64> [[WIDE_VEC1]], <8 x i64> poison, <4 x i32> ; FIXED-NEXT: [[STRIDED_VEC4:%.*]] = shufflevector <8 x i64> [[WIDE_VEC1]], <8 x i64> poison, <4 x i32> ; FIXED-NEXT: [[TMP8:%.*]] = add <4 x i64> [[STRIDED_VEC]], [[STRIDED_VEC3]] @@ -1403,8 +1381,7 @@ define void @combine_load_factor2_i64(ptr noalias %p, ptr noalias %q) { ; SCALABLE-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 ; SCALABLE-NEXT: [[TMP7:%.*]] = shl i64 [[TMP6]], 1 ; SCALABLE-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP7]] -; SCALABLE-NEXT: [[TMP9:%.*]] = getelementptr i64, ptr [[TMP8]], i32 0 -; SCALABLE-NEXT: [[WIDE_VEC:%.*]] = load , ptr [[TMP9]], align 8 +; SCALABLE-NEXT: [[WIDE_VEC:%.*]] = load , ptr [[TMP8]], align 8 ; SCALABLE-NEXT: [[STRIDED_VEC:%.*]] = call { , } @llvm.vector.deinterleave2.nxv4i64( [[WIDE_VEC]]) ; SCALABLE-NEXT: [[TMP10:%.*]] = extractvalue { , } [[STRIDED_VEC]], 0 ; SCALABLE-NEXT: [[TMP11:%.*]] = extractvalue { , } [[STRIDED_VEC]], 1 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-cost.ll b/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-cost.ll index 7bfd2eaad5741cd..fa346b4eac02d43 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-cost.ll @@ -1,33 +1,31 @@ ; REQUIRES: asserts -; RUN: opt -passes=loop-vectorize -mtriple=riscv64 -mattr=+v -force-vector-width=2 -debug-only=loop-vectorize -disable-output < %s 2>&1 | FileCheck %s --check-prefix=VF_2 -; RUN: opt -passes=loop-vectorize -mtriple=riscv64 -mattr=+v -force-vector-width=4 -debug-only=loop-vectorize -disable-output < %s 2>&1 | FileCheck %s --check-prefix=VF_4 -; RUN: opt -passes=loop-vectorize -mtriple=riscv64 -mattr=+v -force-vector-width=8 -debug-only=loop-vectorize -disable-output < %s 2>&1 | FileCheck %s --check-prefix=VF_8 -; RUN: opt -passes=loop-vectorize -mtriple=riscv64 -mattr=+v -force-vector-width=16 -debug-only=loop-vectorize -disable-output < %s 2>&1 | FileCheck %s --check-prefix=VF_16 +; RUN: opt -passes=loop-vectorize -mtriple=riscv64 -mattr=+v -debug-only=loop-vectorize -disable-output < %s 2>&1 | FileCheck %s %i8.2 = type {i8, i8} define void @i8_factor_2(ptr %data, i64 %n) { entry: br label %for.body -; VF_2-LABEL: Checking a loop in 'i8_factor_2' -; VF_2: Found an estimated cost of 2 for VF 2 For instruction: %l0 = load i8, ptr %p0, align 1 -; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %l1 = load i8, ptr %p1, align 1 -; VF_2: Found an estimated cost of 0 for VF 2 For instruction: store i8 %a0, ptr %p0, align 1 -; VF_2-NEXT: Found an estimated cost of 2 for VF 2 For instruction: store i8 %a1, ptr %p1, align 1 -; VF_4-LABEL: Checking a loop in 'i8_factor_2' -; VF_4: Found an estimated cost of 2 for VF 4 For instruction: %l0 = load i8, ptr %p0, align 1 -; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %l1 = load i8, ptr %p1, align 1 -; VF_4: Found an estimated cost of 0 for VF 4 For instruction: store i8 %a0, ptr %p0, align 1 -; VF_4-NEXT: Found an estimated cost of 2 for VF 4 For instruction: store i8 %a1, ptr %p1, align 1 -; VF_8-LABEL: Checking a loop in 'i8_factor_2' -; VF_8: Found an estimated cost of 2 for VF 8 For instruction: %l0 = load i8, ptr %p0, align 1 -; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %l1 = load i8, ptr %p1, align 1 -; VF_8: Found an estimated cost of 0 for VF 8 For instruction: store i8 %a0, ptr %p0, align 1 -; VF_8-NEXT: Found an estimated cost of 2 for VF 8 For instruction: store i8 %a1, ptr %p1, align 1 -; VF_16-LABEL: Checking a loop in 'i8_factor_2' -; VF_16: Found an estimated cost of 3 for VF 16 For instruction: %l0 = load i8, ptr %p0, align 1 -; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %l1 = load i8, ptr %p1, align 1 -; VF_16: Found an estimated cost of 0 for VF 16 For instruction: store i8 %a0, ptr %p0, align 1 -; VF_16-NEXT: Found an estimated cost of 3 for VF 16 For instruction: store i8 %a1, ptr %p1, align 1 +; CHECK-LABEL: Checking a loop in 'i8_factor_2' +; CHECK: Cost of 2 for VF 2: INTERLEAVE-GROUP with factor 2 at %l0, ir<%p0> +; CHECK: Cost of 2 for VF 2: INTERLEAVE-GROUP with factor 2 at , ir<%p0> +; CHECK: Cost of 2 for VF 4: INTERLEAVE-GROUP with factor 2 at %l0, ir<%p0> +; CHECK: Cost of 2 for VF 4: INTERLEAVE-GROUP with factor 2 at , ir<%p0> +; CHECK: Cost of 2 for VF 8: INTERLEAVE-GROUP with factor 2 at %l0, ir<%p0> +; CHECK: Cost of 2 for VF 8: INTERLEAVE-GROUP with factor 2 at , ir<%p0> +; CHECK: Cost of 3 for VF 16: INTERLEAVE-GROUP with factor 2 at %l0, ir<%p0> +; CHECK: Cost of 3 for VF 16: INTERLEAVE-GROUP with factor 2 at , ir<%p0> +; CHECK: Cost of 5 for VF 32: INTERLEAVE-GROUP with factor 2 at %l0, ir<%p0> +; CHECK: Cost of 5 for VF 32: INTERLEAVE-GROUP with factor 2 at , ir<%p0> +; CHECK: Cost of 2 for VF vscale x 1: INTERLEAVE-GROUP with factor 2 at %l0, ir<%p0> +; CHECK: Cost of 2 for VF vscale x 1: INTERLEAVE-GROUP with factor 2 at , ir<%p0> +; CHECK: Cost of 2 for VF vscale x 2: INTERLEAVE-GROUP with factor 2 at %l0, ir<%p0> +; CHECK: Cost of 2 for VF vscale x 2: INTERLEAVE-GROUP with factor 2 at , ir<%p0> +; CHECK: Cost of 2 for VF vscale x 4: INTERLEAVE-GROUP with factor 2 at %l0, ir<%p0> +; CHECK: Cost of 2 for VF vscale x 4: INTERLEAVE-GROUP with factor 2 at , ir<%p0> +; CHECK: Cost of 3 for VF vscale x 8: INTERLEAVE-GROUP with factor 2 at %l0, ir<%p0> +; CHECK: Cost of 3 for VF vscale x 8: INTERLEAVE-GROUP with factor 2 at , ir<%p0> +; CHECK: Cost of 5 for VF vscale x 16: INTERLEAVE-GROUP with factor 2 at %l0, ir<%p0> +; CHECK: Cost of 5 for VF vscale x 16: INTERLEAVE-GROUP with factor 2 at , ir<%p0> for.body: %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] %p0 = getelementptr inbounds %i8.2, ptr %data, i64 %i, i32 0 @@ -50,34 +48,17 @@ for.end: define void @i8_factor_3(ptr %data, i64 %n) { entry: br label %for.body -; VF_2-LABEL: Checking a loop in 'i8_factor_3' -; VF_2: Found an estimated cost of 2 for VF 2 For instruction: %l0 = load i8, ptr %p0, align 1 -; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %l1 = load i8, ptr %p1, align 1 -; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %l2 = load i8, ptr %p2, align 1 -; VF_2: Found an estimated cost of 0 for VF 2 For instruction: store i8 %a0, ptr %p0, align 1 -; VF_2: Found an estimated cost of 0 for VF 2 For instruction: store i8 %a1, ptr %p1, align 1 -; VF_2-NEXT: Found an estimated cost of 2 for VF 2 For instruction: store i8 %a2, ptr %p2, align 1 -; VF_4-LABEL: Checking a loop in 'i8_factor_3' -; VF_4: Found an estimated cost of 2 for VF 4 For instruction: %l0 = load i8, ptr %p0, align 1 -; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %l1 = load i8, ptr %p1, align 1 -; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %l2 = load i8, ptr %p2, align 1 -; VF_4: Found an estimated cost of 0 for VF 4 For instruction: store i8 %a0, ptr %p0, align 1 -; VF_4: Found an estimated cost of 0 for VF 4 For instruction: store i8 %a1, ptr %p1, align 1 -; VF_4-NEXT: Found an estimated cost of 2 for VF 4 For instruction: store i8 %a2, ptr %p2, align 1 -; VF_8-LABEL: Checking a loop in 'i8_factor_3' -; VF_8: Found an estimated cost of 3 for VF 8 For instruction: %l0 = load i8, ptr %p0, align 1 -; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %l1 = load i8, ptr %p1, align 1 -; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %l2 = load i8, ptr %p2, align 1 -; VF_8: Found an estimated cost of 0 for VF 8 For instruction: store i8 %a0, ptr %p0, align 1 -; VF_8: Found an estimated cost of 0 for VF 8 For instruction: store i8 %a1, ptr %p1, align 1 -; VF_8-NEXT: Found an estimated cost of 3 for VF 8 For instruction: store i8 %a2, ptr %p2, align 1 -; VF_16-LABEL: Checking a loop in 'i8_factor_3' -; VF_16: Found an estimated cost of 5 for VF 16 For instruction: %l0 = load i8, ptr %p0, align 1 -; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %l1 = load i8, ptr %p1, align 1 -; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %l2 = load i8, ptr %p2, align 1 -; VF_16: Found an estimated cost of 0 for VF 16 For instruction: store i8 %a0, ptr %p0, align 1 -; VF_16: Found an estimated cost of 0 for VF 16 For instruction: store i8 %a1, ptr %p1, align 1 -; VF_16-NEXT: Found an estimated cost of 5 for VF 16 For instruction: store i8 %a2, ptr %p2, align 1 +; CHECK-LABEL: Checking a loop in 'i8_factor_3' +; CHECK: Cost of 2 for VF 2: INTERLEAVE-GROUP with factor 3 at %l0, ir<%p0> +; CHECK: Cost of 2 for VF 2: INTERLEAVE-GROUP with factor 3 at , ir<%p0> +; CHECK: Cost of 2 for VF 4: INTERLEAVE-GROUP with factor 3 at %l0, ir<%p0> +; CHECK: Cost of 2 for VF 4: INTERLEAVE-GROUP with factor 3 at , ir<%p0> +; CHECK: Cost of 3 for VF 8: INTERLEAVE-GROUP with factor 3 at %l0, ir<%p0> +; CHECK: Cost of 3 for VF 8: INTERLEAVE-GROUP with factor 3 at , ir<%p0> +; CHECK: Cost of 5 for VF 16: INTERLEAVE-GROUP with factor 3 at %l0, ir<%p0> +; CHECK: Cost of 5 for VF 16: INTERLEAVE-GROUP with factor 3 at , ir<%p0> +; CHECK: Cost of 9 for VF 32: INTERLEAVE-GROUP with factor 3 at %l0, ir<%p0> +; CHECK: Cost of 9 for VF 32: INTERLEAVE-GROUP with factor 3 at , ir<%p0> for.body: %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] %p0 = getelementptr inbounds %i8.3, ptr %data, i64 %i, i32 0 @@ -99,3 +80,240 @@ for.body: for.end: ret void } + +%i8.4 = type {i8, i8, i8, i8} +define void @i8_factor_4(ptr %data, i64 %n) { +entry: + br label %for.body +; CHECK-LABEL: Checking a loop in 'i8_factor_4' +; CHECK: Cost of 2 for VF 2: INTERLEAVE-GROUP with factor 4 at %l0, ir<%p0> +; CHECK: Cost of 2 for VF 2: INTERLEAVE-GROUP with factor 4 at , ir<%p0> +; CHECK: Cost of 2 for VF 4: INTERLEAVE-GROUP with factor 4 at %l0, ir<%p0> +; CHECK: Cost of 2 for VF 4: INTERLEAVE-GROUP with factor 4 at , ir<%p0> +; CHECK: Cost of 3 for VF 8: INTERLEAVE-GROUP with factor 4 at %l0, ir<%p0> +; CHECK: Cost of 3 for VF 8: INTERLEAVE-GROUP with factor 4 at , ir<%p0> +; CHECK: Cost of 5 for VF 16: INTERLEAVE-GROUP with factor 4 at %l0, ir<%p0> +; CHECK: Cost of 5 for VF 16: INTERLEAVE-GROUP with factor 4 at , ir<%p0> +; CHECK: Cost of 9 for VF 32: INTERLEAVE-GROUP with factor 4 at %l0, ir<%p0> +; CHECK: Cost of 9 for VF 32: INTERLEAVE-GROUP with factor 4 at , ir<%p0> +for.body: + %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] + %p0 = getelementptr inbounds %i8.4, ptr %data, i64 %i, i32 0 + %p1 = getelementptr inbounds %i8.4, ptr %data, i64 %i, i32 1 + %p2 = getelementptr inbounds %i8.4, ptr %data, i64 %i, i32 2 + %p3 = getelementptr inbounds %i8.4, ptr %data, i64 %i, i32 3 + %l0 = load i8, ptr %p0, align 1 + %l1 = load i8, ptr %p1, align 1 + %l2 = load i8, ptr %p2, align 1 + %l3 = load i8, ptr %p3, align 1 + %a0 = add i8 %l0, 1 + %a1 = add i8 %l1, 2 + %a2 = add i8 %l2, 3 + %a3 = add i8 %l3, 4 + store i8 %a0, ptr %p0, align 1 + store i8 %a1, ptr %p1, align 1 + store i8 %a2, ptr %p2, align 1 + store i8 %a3, ptr %p3, align 1 + %i.next = add nuw nsw i64 %i, 1 + %cond = icmp slt i64 %i.next, %n + br i1 %cond, label %for.body, label %for.end + +for.end: + ret void +} + +%i8.5 = type {i8, i8, i8, i8, i8} +define void @i8_factor_5(ptr %data, i64 %n) { +entry: + br label %for.body +; CHECK-LABEL: Checking a loop in 'i8_factor_5' +; CHECK: Cost of 2 for VF 2: INTERLEAVE-GROUP with factor 5 at %l0, ir<%p0> +; CHECK: Cost of 2 for VF 2: INTERLEAVE-GROUP with factor 5 at , ir<%p0> +; CHECK: Cost of 3 for VF 4: INTERLEAVE-GROUP with factor 5 at %l0, ir<%p0> +; CHECK: Cost of 3 for VF 4: INTERLEAVE-GROUP with factor 5 at , ir<%p0> +; CHECK: Cost of 5 for VF 8: INTERLEAVE-GROUP with factor 5 at %l0, ir<%p0> +; CHECK: Cost of 5 for VF 8: INTERLEAVE-GROUP with factor 5 at , ir<%p0> +; CHECK: Cost of 9 for VF 16: INTERLEAVE-GROUP with factor 5 at %l0, ir<%p0> +; CHECK: Cost of 9 for VF 16: INTERLEAVE-GROUP with factor 5 at , ir<%p0> +for.body: + %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] + %p0 = getelementptr inbounds %i8.5, ptr %data, i64 %i, i32 0 + %p1 = getelementptr inbounds %i8.5, ptr %data, i64 %i, i32 1 + %p2 = getelementptr inbounds %i8.5, ptr %data, i64 %i, i32 2 + %p3 = getelementptr inbounds %i8.5, ptr %data, i64 %i, i32 3 + %p4 = getelementptr inbounds %i8.5, ptr %data, i64 %i, i32 4 + %l0 = load i8, ptr %p0, align 1 + %l1 = load i8, ptr %p1, align 1 + %l2 = load i8, ptr %p2, align 1 + %l3 = load i8, ptr %p3, align 1 + %l4 = load i8, ptr %p4, align 1 + %a0 = add i8 %l0, 1 + %a1 = add i8 %l1, 2 + %a2 = add i8 %l2, 3 + %a3 = add i8 %l3, 4 + %a4 = add i8 %l4, 5 + store i8 %a0, ptr %p0, align 1 + store i8 %a1, ptr %p1, align 1 + store i8 %a2, ptr %p2, align 1 + store i8 %a3, ptr %p3, align 1 + store i8 %a4, ptr %p4, align 1 + %i.next = add nuw nsw i64 %i, 1 + %cond = icmp slt i64 %i.next, %n + br i1 %cond, label %for.body, label %for.end + +for.end: + ret void +} + +%i8.6 = type {i8, i8, i8, i8, i8, i8} +define void @i8_factor_6(ptr %data, i64 %n) { +entry: + br label %for.body +; CHECK-LABEL: Checking a loop in 'i8_factor_6' +; CHECK: Cost of 2 for VF 2: INTERLEAVE-GROUP with factor 6 at %l0, ir<%p0> +; CHECK: Cost of 2 for VF 2: INTERLEAVE-GROUP with factor 6 at , ir<%p0> +; CHECK: Cost of 3 for VF 4: INTERLEAVE-GROUP with factor 6 at %l0, ir<%p0> +; CHECK: Cost of 3 for VF 4: INTERLEAVE-GROUP with factor 6 at , ir<%p0> +; CHECK: Cost of 5 for VF 8: INTERLEAVE-GROUP with factor 6 at %l0, ir<%p0> +; CHECK: Cost of 5 for VF 8: INTERLEAVE-GROUP with factor 6 at , ir<%p0> +; CHECK: Cost of 9 for VF 16: INTERLEAVE-GROUP with factor 6 at %l0, ir<%p0> +; CHECK: Cost of 9 for VF 16: INTERLEAVE-GROUP with factor 6 at , ir<%p0> +for.body: + %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] + %p0 = getelementptr inbounds %i8.6, ptr %data, i64 %i, i32 0 + %p1 = getelementptr inbounds %i8.6, ptr %data, i64 %i, i32 1 + %p2 = getelementptr inbounds %i8.6, ptr %data, i64 %i, i32 2 + %p3 = getelementptr inbounds %i8.6, ptr %data, i64 %i, i32 3 + %p4 = getelementptr inbounds %i8.6, ptr %data, i64 %i, i32 4 + %p5 = getelementptr inbounds %i8.6, ptr %data, i64 %i, i32 5 + %l0 = load i8, ptr %p0, align 1 + %l1 = load i8, ptr %p1, align 1 + %l2 = load i8, ptr %p2, align 1 + %l3 = load i8, ptr %p3, align 1 + %l4 = load i8, ptr %p4, align 1 + %l5 = load i8, ptr %p5, align 1 + %a0 = add i8 %l0, 1 + %a1 = add i8 %l1, 2 + %a2 = add i8 %l2, 3 + %a3 = add i8 %l3, 4 + %a4 = add i8 %l4, 5 + %a5 = add i8 %l5, 6 + store i8 %a0, ptr %p0, align 1 + store i8 %a1, ptr %p1, align 1 + store i8 %a2, ptr %p2, align 1 + store i8 %a3, ptr %p3, align 1 + store i8 %a4, ptr %p4, align 1 + store i8 %a5, ptr %p5, align 1 + %i.next = add nuw nsw i64 %i, 1 + %cond = icmp slt i64 %i.next, %n + br i1 %cond, label %for.body, label %for.end + +for.end: + ret void +} + +%i8.7 = type {i8, i8, i8, i8, i8, i8, i8} +define void @i8_factor_7(ptr %data, i64 %n) { +entry: + br label %for.body +; CHECK-LABEL: Checking a loop in 'i8_factor_7' +; CHECK: Cost of 2 for VF 2: INTERLEAVE-GROUP with factor 7 at %l0, ir<%p0> +; CHECK: Cost of 2 for VF 2: INTERLEAVE-GROUP with factor 7 at , ir<%p0> +; CHECK: Cost of 3 for VF 4: INTERLEAVE-GROUP with factor 7 at %l0, ir<%p0> +; CHECK: Cost of 3 for VF 4: INTERLEAVE-GROUP with factor 7 at , ir<%p0> +; CHECK: Cost of 5 for VF 8: INTERLEAVE-GROUP with factor 7 at %l0, ir<%p0> +; CHECK: Cost of 5 for VF 8: INTERLEAVE-GROUP with factor 7 at , ir<%p0> +; CHECK: Cost of 9 for VF 16: INTERLEAVE-GROUP with factor 7 at %l0, ir<%p0> +; CHECK: Cost of 9 for VF 16: INTERLEAVE-GROUP with factor 7 at , ir<%p0> +for.body: + %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] + %p0 = getelementptr inbounds %i8.7, ptr %data, i64 %i, i32 0 + %p1 = getelementptr inbounds %i8.7, ptr %data, i64 %i, i32 1 + %p2 = getelementptr inbounds %i8.7, ptr %data, i64 %i, i32 2 + %p3 = getelementptr inbounds %i8.7, ptr %data, i64 %i, i32 3 + %p4 = getelementptr inbounds %i8.7, ptr %data, i64 %i, i32 4 + %p5 = getelementptr inbounds %i8.7, ptr %data, i64 %i, i32 5 + %p6 = getelementptr inbounds %i8.7, ptr %data, i64 %i, i32 6 + %l0 = load i8, ptr %p0, align 1 + %l1 = load i8, ptr %p1, align 1 + %l2 = load i8, ptr %p2, align 1 + %l3 = load i8, ptr %p3, align 1 + %l4 = load i8, ptr %p4, align 1 + %l5 = load i8, ptr %p5, align 1 + %l6 = load i8, ptr %p6, align 1 + %a0 = add i8 %l0, 1 + %a1 = add i8 %l1, 2 + %a2 = add i8 %l2, 3 + %a3 = add i8 %l3, 4 + %a4 = add i8 %l4, 5 + %a5 = add i8 %l5, 6 + %a6 = add i8 %l6, 7 + store i8 %a0, ptr %p0, align 1 + store i8 %a1, ptr %p1, align 1 + store i8 %a2, ptr %p2, align 1 + store i8 %a3, ptr %p3, align 1 + store i8 %a4, ptr %p4, align 1 + store i8 %a5, ptr %p5, align 1 + store i8 %a6, ptr %p6, align 1 + %i.next = add nuw nsw i64 %i, 1 + %cond = icmp slt i64 %i.next, %n + br i1 %cond, label %for.body, label %for.end + +for.end: + ret void +} + +%i8.8 = type {i8, i8, i8, i8, i8, i8, i8, i8} +define void @i8_factor_8(ptr %data, i64 %n) { +entry: + br label %for.body +; CHECK-LABEL: Checking a loop in 'i8_factor_8' +; CHECK: Cost of 2 for VF 2: INTERLEAVE-GROUP with factor 8 at %l0, ir<%p0> +; CHECK: Cost of 2 for VF 2: INTERLEAVE-GROUP with factor 8 at , ir<%p0> +; CHECK: Cost of 3 for VF 4: INTERLEAVE-GROUP with factor 8 at %l0, ir<%p0> +; CHECK: Cost of 3 for VF 4: INTERLEAVE-GROUP with factor 8 at , ir<%p0> +; CHECK: Cost of 5 for VF 8: INTERLEAVE-GROUP with factor 8 at %l0, ir<%p0> +; CHECK: Cost of 5 for VF 8: INTERLEAVE-GROUP with factor 8 at , ir<%p0> +; CHECK: Cost of 9 for VF 16: INTERLEAVE-GROUP with factor 8 at %l0, ir<%p0> +; CHECK: Cost of 9 for VF 16: INTERLEAVE-GROUP with factor 8 at , ir<%p0> +for.body: + %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] + %p0 = getelementptr inbounds %i8.8, ptr %data, i64 %i, i32 0 + %p1 = getelementptr inbounds %i8.8, ptr %data, i64 %i, i32 1 + %p2 = getelementptr inbounds %i8.8, ptr %data, i64 %i, i32 2 + %p3 = getelementptr inbounds %i8.8, ptr %data, i64 %i, i32 3 + %p4 = getelementptr inbounds %i8.8, ptr %data, i64 %i, i32 4 + %p5 = getelementptr inbounds %i8.8, ptr %data, i64 %i, i32 5 + %p6 = getelementptr inbounds %i8.8, ptr %data, i64 %i, i32 6 + %p7 = getelementptr inbounds %i8.8, ptr %data, i64 %i, i32 7 + %l0 = load i8, ptr %p0, align 1 + %l1 = load i8, ptr %p1, align 1 + %l2 = load i8, ptr %p2, align 1 + %l3 = load i8, ptr %p3, align 1 + %l4 = load i8, ptr %p4, align 1 + %l5 = load i8, ptr %p5, align 1 + %l6 = load i8, ptr %p6, align 1 + %l7 = load i8, ptr %p7, align 1 + %a0 = add i8 %l0, 1 + %a1 = add i8 %l1, 2 + %a2 = add i8 %l2, 3 + %a3 = add i8 %l3, 4 + %a4 = add i8 %l4, 5 + %a5 = add i8 %l5, 6 + %a6 = add i8 %l6, 7 + %a7 = add i8 %l7, 8 + store i8 %a0, ptr %p0, align 1 + store i8 %a1, ptr %p1, align 1 + store i8 %a2, ptr %p2, align 1 + store i8 %a3, ptr %p3, align 1 + store i8 %a4, ptr %p4, align 1 + store i8 %a5, ptr %p5, align 1 + store i8 %a6, ptr %p6, align 1 + store i8 %a7, ptr %p7, align 1 + %i.next = add nuw nsw i64 %i, 1 + %cond = icmp slt i64 %i.next, %n + br i1 %cond, label %for.body, label %for.end + +for.end: + ret void +} diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll index 99b8cb7ae94b951..437ba4be943baf5 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll @@ -184,8 +184,7 @@ define void @single_constant_stride_ptr_iv(ptr %p) { ; CHECK-NEXT: [[TMP16:%.*]] = mul [[TMP15]], shufflevector ( insertelement ( poison, i64 8, i64 0), poison, zeroinitializer) ; CHECK-NEXT: [[VECTOR_GEP:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], [[TMP16]] ; CHECK-NEXT: [[TMP17:%.*]] = extractelement [[VECTOR_GEP]], i32 0 -; CHECK-NEXT: [[TMP18:%.*]] = getelementptr i32, ptr [[TMP17]], i32 0 -; CHECK-NEXT: [[WIDE_VEC:%.*]] = load , ptr [[TMP18]], align 4 +; CHECK-NEXT: [[WIDE_VEC:%.*]] = load , ptr [[TMP17]], align 4 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = call { , } @llvm.vector.deinterleave2.nxv8i32( [[WIDE_VEC]]) ; CHECK-NEXT: [[TMP19:%.*]] = extractvalue { , } [[STRIDED_VEC]], 0 ; CHECK-NEXT: [[TMP20:%.*]] = add [[TMP19]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-interleave.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-interleave.ll index 92af164f1c579e4..8ba47e17cab71ab 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-interleave.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-interleave.ll @@ -110,13 +110,11 @@ define void @interleave(ptr noalias %a, ptr noalias %b, i64 %N) { ; NO-VP-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], [[TMP10]] ; NO-VP-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i32], ptr [[B:%.*]], i64 [[TMP6]], i32 0 ; NO-VP-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x i32], ptr [[B]], i64 [[TMP11]], i32 0 -; NO-VP-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 0 -; NO-VP-NEXT: [[WIDE_VEC:%.*]] = load , ptr [[TMP14]], align 4 +; NO-VP-NEXT: [[WIDE_VEC:%.*]] = load , ptr [[TMP12]], align 4 ; NO-VP-NEXT: [[STRIDED_VEC:%.*]] = call { , } @llvm.vector.deinterleave2.nxv8i32( [[WIDE_VEC]]) ; NO-VP-NEXT: [[TMP15:%.*]] = extractvalue { , } [[STRIDED_VEC]], 0 ; NO-VP-NEXT: [[TMP16:%.*]] = extractvalue { , } [[STRIDED_VEC]], 1 -; NO-VP-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP13]], i32 0 -; NO-VP-NEXT: [[WIDE_VEC1:%.*]] = load , ptr [[TMP17]], align 4 +; NO-VP-NEXT: [[WIDE_VEC1:%.*]] = load , ptr [[TMP13]], align 4 ; NO-VP-NEXT: [[STRIDED_VEC2:%.*]] = call { , } @llvm.vector.deinterleave2.nxv8i32( [[WIDE_VEC1]]) ; NO-VP-NEXT: [[TMP18:%.*]] = extractvalue { , } [[STRIDED_VEC2]], 0 ; NO-VP-NEXT: [[TMP19:%.*]] = extractvalue { , } [[STRIDED_VEC2]], 1 diff --git a/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll b/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll index 26601e6192bb01f..8e5bf27acc64f1b 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll @@ -557,17 +557,13 @@ define void @cost_duplicate_recipe_for_sinking(ptr %A, i64 %N) #2 { ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr double, ptr [[A]], i64 [[TMP8]] ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr double, ptr [[A]], i64 [[TMP9]] ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr double, ptr [[A]], i64 [[TMP10]] -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr double, ptr [[TMP11]], i32 0 -; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <16 x double>, ptr [[TMP15]], align 8 +; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <16 x double>, ptr [[TMP11]], align 8 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x double> [[WIDE_VEC]], <16 x double> poison, <4 x i32> -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr double, ptr [[TMP12]], i32 0 -; CHECK-NEXT: [[WIDE_VEC1:%.*]] = load <16 x double>, ptr [[TMP16]], align 8 +; CHECK-NEXT: [[WIDE_VEC1:%.*]] = load <16 x double>, ptr [[TMP12]], align 8 ; CHECK-NEXT: [[STRIDED_VEC4:%.*]] = shufflevector <16 x double> [[WIDE_VEC1]], <16 x double> poison, <4 x i32> -; CHECK-NEXT: [[TMP17:%.*]] = getelementptr double, ptr [[TMP13]], i32 0 -; CHECK-NEXT: [[WIDE_VEC2:%.*]] = load <16 x double>, ptr [[TMP17]], align 8 +; CHECK-NEXT: [[WIDE_VEC2:%.*]] = load <16 x double>, ptr [[TMP13]], align 8 ; CHECK-NEXT: [[STRIDED_VEC5:%.*]] = shufflevector <16 x double> [[WIDE_VEC2]], <16 x double> poison, <4 x i32> -; CHECK-NEXT: [[TMP18:%.*]] = getelementptr double, ptr [[TMP14]], i32 0 -; CHECK-NEXT: [[WIDE_VEC3:%.*]] = load <16 x double>, ptr [[TMP18]], align 8 +; CHECK-NEXT: [[WIDE_VEC3:%.*]] = load <16 x double>, ptr [[TMP14]], align 8 ; CHECK-NEXT: [[STRIDED_VEC6:%.*]] = shufflevector <16 x double> [[WIDE_VEC3]], <16 x double> poison, <4 x i32> ; CHECK-NEXT: [[TMP19:%.*]] = fcmp oeq <4 x double> [[STRIDED_VEC]], zeroinitializer ; CHECK-NEXT: [[TMP20:%.*]] = fcmp oeq <4 x double> [[STRIDED_VEC4]], zeroinitializer diff --git a/llvm/test/Transforms/LoopVectorize/X86/interleave-cost.ll b/llvm/test/Transforms/LoopVectorize/X86/interleave-cost.ll index 9383799b181c82b..7d1d326641e1244 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/interleave-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/interleave-cost.ll @@ -93,20 +93,18 @@ define void @test_free_instructions_feeding_geps_for_interleave_groups(ptr noali ; CHECK-NEXT: [[TMP42:%.*]] = load float, ptr [[P_INVAR]], align 4 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT27:%.*]] = insertelement <2 x float> poison, float [[TMP42]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT28:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT27]], <2 x float> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP45:%.*]] = getelementptr float, ptr [[TMP44]], i32 0 ; CHECK-NEXT: [[TMP46:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLAT]], <2 x float> [[BROADCAST_SPLAT28]], <4 x i32> ; CHECK-NEXT: [[TMP47:%.*]] = shufflevector <4 x float> [[TMP46]], <4 x float> zeroinitializer, <8 x i32> ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x float> [[TMP47]], <8 x float> poison, <8 x i32> -; CHECK-NEXT: store <8 x float> [[INTERLEAVED_VEC]], ptr [[TMP45]], align 4 +; CHECK-NEXT: store <8 x float> [[INTERLEAVED_VEC]], ptr [[TMP44]], align 4 ; CHECK-NEXT: [[TMP48:%.*]] = load float, ptr [[P_INVAR]], align 4 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT29:%.*]] = insertelement <2 x float> poison, float [[TMP48]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT30:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT29]], <2 x float> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP49:%.*]] = getelementptr float, ptr [[DST_2]], i64 [[TMP41]] -; CHECK-NEXT: [[TMP50:%.*]] = getelementptr float, ptr [[TMP49]], i32 0 ; CHECK-NEXT: [[BROADCAST_SPLAT36:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLAT30]], <2 x float> zeroinitializer, <4 x i32> ; CHECK-NEXT: [[TMP51:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLAT36]], <4 x float> zeroinitializer, <8 x i32> ; CHECK-NEXT: [[INTERLEAVED_VEC31:%.*]] = shufflevector <8 x float> [[TMP51]], <8 x float> poison, <8 x i32> -; CHECK-NEXT: store <8 x float> [[INTERLEAVED_VEC31]], ptr [[TMP50]], align 4 +; CHECK-NEXT: store <8 x float> [[INTERLEAVED_VEC31]], ptr [[TMP49]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP53:%.*]] = icmp eq i64 [[INDEX_NEXT]], 0 ; CHECK-NEXT: br i1 [[TMP53]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -247,8 +245,7 @@ define void @geps_feeding_interleave_groups_with_reuse(ptr %arg, i64 %arg1, ptr ; CHECK-NEXT: [[TMP26:%.*]] = getelementptr i8, ptr [[ARG]], i64 [[TMP25]] ; CHECK-NEXT: [[TMP27:%.*]] = shl i64 [[TMP24]], 4 ; CHECK-NEXT: [[TMP28:%.*]] = getelementptr i8, ptr [[ARG2]], i64 [[TMP27]] -; CHECK-NEXT: [[TMP29:%.*]] = getelementptr float, ptr [[TMP26]], i32 0 -; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <16 x float>, ptr [[TMP29]], align 4 +; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <16 x float>, ptr [[TMP26]], align 4 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x float> [[WIDE_VEC]], <16 x float> poison, <2 x i32> ; CHECK-NEXT: [[STRIDED_VEC14:%.*]] = shufflevector <16 x float> [[WIDE_VEC]], <16 x float> poison, <2 x i32> ; CHECK-NEXT: [[STRIDED_VEC15:%.*]] = shufflevector <16 x float> [[WIDE_VEC]], <16 x float> poison, <2 x i32> @@ -265,12 +262,11 @@ define void @geps_feeding_interleave_groups_with_reuse(ptr %arg, i64 %arg1, ptr ; CHECK-NEXT: [[TMP35:%.*]] = fmul <2 x float> [[TMP34]], zeroinitializer ; CHECK-NEXT: [[TMP36:%.*]] = fadd <2 x float> [[STRIDED_VEC16]], [[STRIDED_VEC20]] ; CHECK-NEXT: [[TMP37:%.*]] = fmul <2 x float> [[TMP36]], zeroinitializer -; CHECK-NEXT: [[TMP39:%.*]] = getelementptr float, ptr [[TMP28]], i32 0 ; CHECK-NEXT: [[TMP40:%.*]] = shufflevector <2 x float> [[TMP31]], <2 x float> [[TMP33]], <4 x i32> ; CHECK-NEXT: [[TMP41:%.*]] = shufflevector <2 x float> [[TMP35]], <2 x float> [[TMP37]], <4 x i32> ; CHECK-NEXT: [[TMP42:%.*]] = shufflevector <4 x float> [[TMP40]], <4 x float> [[TMP41]], <8 x i32> ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x float> [[TMP42]], <8 x float> poison, <8 x i32> -; CHECK-NEXT: store <8 x float> [[INTERLEAVED_VEC]], ptr [[TMP39]], align 4 +; CHECK-NEXT: store <8 x float> [[INTERLEAVED_VEC]], ptr [[TMP28]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP43:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP43]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] @@ -484,14 +480,12 @@ define void @geps_feeding_interleave_groups_with_reuse2(ptr %A, ptr %B, i64 %N) ; CHECK-NEXT: [[TMP50:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP51:%.*]] = lshr exact i64 [[TMP50]], 1 ; CHECK-NEXT: [[TMP52:%.*]] = getelementptr i32, ptr [[B]], i64 [[TMP51]] -; CHECK-NEXT: [[TMP53:%.*]] = getelementptr i32, ptr [[TMP52]], i32 0 -; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <16 x i32>, ptr [[TMP53]], align 4 +; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <16 x i32>, ptr [[TMP52]], align 4 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> poison, <4 x i32> ; CHECK-NEXT: [[STRIDED_VEC34:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP56:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP50]] ; CHECK-NEXT: [[TMP54:%.*]] = getelementptr i32, ptr [[B]], <4 x i64> [[VEC_IND]] ; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP54]], i32 4, <4 x i1> , <4 x i32> poison), !alias.scope [[META6:![0-9]+]] -; CHECK-NEXT: [[TMP57:%.*]] = getelementptr i32, ptr [[TMP56]], i32 0 ; CHECK-NEXT: [[TMP58:%.*]] = shufflevector <4 x i32> [[STRIDED_VEC]], <4 x i32> zeroinitializer, <8 x i32> ; CHECK-NEXT: [[TMP59:%.*]] = shufflevector <4 x i32> [[STRIDED_VEC34]], <4 x i32> zeroinitializer, <8 x i32> ; CHECK-NEXT: [[TMP60:%.*]] = shufflevector <4 x i32> [[WIDE_MASKED_GATHER]], <4 x i32> zeroinitializer, <8 x i32> @@ -499,7 +493,7 @@ define void @geps_feeding_interleave_groups_with_reuse2(ptr %A, ptr %B, i64 %N) ; CHECK-NEXT: [[TMP62:%.*]] = shufflevector <8 x i32> [[TMP60]], <8 x i32> zeroinitializer, <16 x i32> ; CHECK-NEXT: [[TMP63:%.*]] = shufflevector <16 x i32> [[TMP61]], <16 x i32> [[TMP62]], <32 x i32> ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[TMP63]], <32 x i32> poison, <32 x i32> -; CHECK-NEXT: store <32 x i32> [[INTERLEAVED_VEC]], ptr [[TMP57]], align 4 +; CHECK-NEXT: store <32 x i32> [[INTERLEAVED_VEC]], ptr [[TMP56]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], ; CHECK-NEXT: [[TMP64:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/interleave-opaque-pointers.ll b/llvm/test/Transforms/LoopVectorize/X86/interleave-opaque-pointers.ll index 1b0118e137e7352..3a07bcca523ce12 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/interleave-opaque-pointers.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/interleave-opaque-pointers.ll @@ -32,10 +32,9 @@ define void @test_pr55375_interleave_opaque_ptr(ptr %start, ptr %end) { ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP6]] ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x ptr> poison, ptr [[TMP7]], i32 0 ; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x ptr> [[TMP9]], ptr [[TMP8]], i32 1 -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr ptr, ptr [[TMP7]], i32 0 ; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x ptr> zeroinitializer, <2 x ptr> [[TMP10]], <4 x i32> ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x ptr> [[TMP12]], <4 x ptr> poison, <4 x i32> -; CHECK-NEXT: store <4 x ptr> [[INTERLEAVED_VEC]], ptr [[TMP11]], align 8 +; CHECK-NEXT: store <4 x ptr> [[INTERLEAVED_VEC]], ptr [[TMP7]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/interleaved-accesses-hoist-load-across-store.ll b/llvm/test/Transforms/LoopVectorize/X86/interleaved-accesses-hoist-load-across-store.ll index f6b43c4ca7940e8..5b2f2717222f7cc 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/interleaved-accesses-hoist-load-across-store.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/interleaved-accesses-hoist-load-across-store.ll @@ -27,8 +27,7 @@ define void @pr63602_1(ptr %arr) { ; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX2]], 9 ; CHECK-NEXT: [[TMP7:%.*]] = add nuw nsw i64 [[TMP1]], 4 ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 0 -; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <12 x i32>, ptr [[TMP9]], align 4 +; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <12 x i32>, ptr [[TMP8]], align 4 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <12 x i32> [[WIDE_VEC]], <12 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP3]] ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP4]] @@ -44,8 +43,7 @@ define void @pr63602_1(ptr %arr) { ; CHECK-NEXT: store i32 [[TMP17]], ptr [[TMP13]], align 4 ; CHECK-NEXT: [[TMP18:%.*]] = add nuw nsw i64 [[TMP1]], 2 ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP18]] -; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[TMP19]], i32 0 -; CHECK-NEXT: [[WIDE_VEC3:%.*]] = load <12 x i32>, ptr [[TMP20]], align 4 +; CHECK-NEXT: [[WIDE_VEC3:%.*]] = load <12 x i32>, ptr [[TMP19]], align 4 ; CHECK-NEXT: [[STRIDED_VEC4:%.*]] = shufflevector <12 x i32> [[WIDE_VEC3]], <12 x i32> poison, <4 x i32> ; CHECK-NEXT: [[STRIDED_VEC5:%.*]] = shufflevector <12 x i32> [[WIDE_VEC3]], <12 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP21:%.*]] = add <4 x i32> [[STRIDED_VEC5]], [[STRIDED_VEC4]] @@ -144,8 +142,7 @@ define void @pr63602_2(ptr %arr) { ; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[OFFSET_IDX2]], 9 ; CHECK-NEXT: [[TMP10:%.*]] = add nuw nsw i64 [[TMP1]], 4 ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP10]] -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i32 0 -; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <12 x i32>, ptr [[TMP12]], align 4 +; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <12 x i32>, ptr [[TMP11]], align 4 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <12 x i32> [[WIDE_VEC]], <12 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP6]] ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP7]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/interleaved-accesses-sink-store-across-load.ll b/llvm/test/Transforms/LoopVectorize/X86/interleaved-accesses-sink-store-across-load.ll index 1452675fdc72e40..717dbe359f1050d 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/interleaved-accesses-sink-store-across-load.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/interleaved-accesses-sink-store-across-load.ll @@ -22,8 +22,7 @@ define void @avoid_sinking_store_across_load(ptr %arr) { ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 4 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 0 -; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <12 x i32>, ptr [[TMP4]], align 4 +; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <12 x i32>, ptr [[TMP3]], align 4 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <12 x i32> [[WIDE_VEC]], <12 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[ARR]], <4 x i64> [[VEC_IND2]] ; CHECK-NEXT: [[TMP6:%.*]] = add nuw nsw <4 x i64> [[VEC_IND]], @@ -31,8 +30,7 @@ define void @avoid_sinking_store_across_load(ptr %arr) { ; CHECK-NEXT: [[TMP8:%.*]] = mul <4 x i32> [[STRIDED_VEC]], ; CHECK-NEXT: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> [[TMP8]], <4 x ptr> [[TMP7]], i32 4, <4 x i1> ) ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x ptr> [[TMP7]], i32 0 -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[TMP9]], i32 0 -; CHECK-NEXT: [[WIDE_VEC4:%.*]] = load <12 x i32>, ptr [[TMP10]], align 4 +; CHECK-NEXT: [[WIDE_VEC4:%.*]] = load <12 x i32>, ptr [[TMP9]], align 4 ; CHECK-NEXT: [[STRIDED_VEC5:%.*]] = shufflevector <12 x i32> [[WIDE_VEC4]], <12 x i32> poison, <4 x i32> ; CHECK-NEXT: [[STRIDED_VEC6:%.*]] = shufflevector <12 x i32> [[WIDE_VEC4]], <12 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP11:%.*]] = add <4 x i32> [[STRIDED_VEC6]], [[STRIDED_VEC5]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/limit-vf-by-tripcount.ll b/llvm/test/Transforms/LoopVectorize/X86/limit-vf-by-tripcount.ll index 59b179577dcff0e..c6dfefe5c9ad4fd 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/limit-vf-by-tripcount.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/limit-vf-by-tripcount.ll @@ -277,8 +277,7 @@ define void @limit_main_loop_vf_to_avoid_dead_main_vector_loop(ptr noalias %src, ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [3 x i8], ptr [[SRC:%.*]], i64 [[TMP0]], i64 0 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 0 -; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <24 x i8>, ptr [[TMP2]], align 1 +; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <24 x i8>, ptr [[TMP1]], align 1 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <24 x i8> [[WIDE_VEC]], <24 x i8> poison, <8 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[DST:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i32 0 diff --git a/llvm/test/Transforms/LoopVectorize/X86/masked-store-cost.ll b/llvm/test/Transforms/LoopVectorize/X86/masked-store-cost.ll index d6b780505a47d04..ecae00807f4e722 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/masked-store-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/masked-store-cost.ll @@ -177,11 +177,9 @@ define void @test_scalar_cost_single_store_loop_varying_cond(ptr %dst, ptr noali ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX3]], 16 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP3]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 0 -; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <16 x i32>, ptr [[TMP6]], align 4 +; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <16 x i32>, ptr [[TMP4]], align 4 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> poison, <4 x i32> -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0 -; CHECK-NEXT: [[WIDE_VEC4:%.*]] = load <16 x i32>, ptr [[TMP7]], align 4 +; CHECK-NEXT: [[WIDE_VEC4:%.*]] = load <16 x i32>, ptr [[TMP5]], align 4 ; CHECK-NEXT: [[STRIDED_VEC5:%.*]] = shufflevector <16 x i32> [[WIDE_VEC4]], <16 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq <4 x i32> [[STRIDED_VEC]], ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq <4 x i32> [[STRIDED_VEC5]], diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr47437.ll b/llvm/test/Transforms/LoopVectorize/X86/pr47437.ll index 8f3ca115af514e1..4517583965ea4ff 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/pr47437.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr47437.ll @@ -23,14 +23,12 @@ define void @test_muladd(ptr noalias nocapture %d1, ptr noalias nocapture readon ; SSE2-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; SSE2-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 1 ; SSE2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, ptr [[S1:%.*]], i64 [[TMP1]] -; SSE2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[TMP2]], i32 0 -; SSE2-NEXT: [[WIDE_VEC:%.*]] = load <8 x i16>, ptr [[TMP3]], align 2 +; SSE2-NEXT: [[WIDE_VEC:%.*]] = load <8 x i16>, ptr [[TMP2]], align 2 ; SSE2-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i16> [[WIDE_VEC]], <8 x i16> poison, <4 x i32> ; SSE2-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <8 x i16> [[WIDE_VEC]], <8 x i16> poison, <4 x i32> ; SSE2-NEXT: [[TMP4:%.*]] = sext <4 x i16> [[STRIDED_VEC]] to <4 x i32> ; SSE2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[S2:%.*]], i64 [[TMP1]] -; SSE2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[TMP5]], i32 0 -; SSE2-NEXT: [[WIDE_VEC2:%.*]] = load <8 x i16>, ptr [[TMP6]], align 2 +; SSE2-NEXT: [[WIDE_VEC2:%.*]] = load <8 x i16>, ptr [[TMP5]], align 2 ; SSE2-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <8 x i16> [[WIDE_VEC2]], <8 x i16> poison, <4 x i32> ; SSE2-NEXT: [[STRIDED_VEC4:%.*]] = shufflevector <8 x i16> [[WIDE_VEC2]], <8 x i16> poison, <4 x i32> ; SSE2-NEXT: [[TMP7:%.*]] = sext <4 x i16> [[STRIDED_VEC3]] to <4 x i32> @@ -100,24 +98,20 @@ define void @test_muladd(ptr noalias nocapture %d1, ptr noalias nocapture readon ; SSE41-NEXT: [[TMP3:%.*]] = shl nuw nsw i64 [[TMP1]], 1 ; SSE41-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, ptr [[S1:%.*]], i64 [[TMP2]] ; SSE41-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[S1]], i64 [[TMP3]] -; SSE41-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[TMP4]], i32 0 -; SSE41-NEXT: [[WIDE_VEC:%.*]] = load <8 x i16>, ptr [[TMP6]], align 2 +; SSE41-NEXT: [[WIDE_VEC:%.*]] = load <8 x i16>, ptr [[TMP4]], align 2 ; SSE41-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i16> [[WIDE_VEC]], <8 x i16> poison, <4 x i32> ; SSE41-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <8 x i16> [[WIDE_VEC]], <8 x i16> poison, <4 x i32> -; SSE41-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, ptr [[TMP5]], i32 0 -; SSE41-NEXT: [[WIDE_VEC1:%.*]] = load <8 x i16>, ptr [[TMP7]], align 2 +; SSE41-NEXT: [[WIDE_VEC1:%.*]] = load <8 x i16>, ptr [[TMP5]], align 2 ; SSE41-NEXT: [[STRIDED_VEC2:%.*]] = shufflevector <8 x i16> [[WIDE_VEC1]], <8 x i16> poison, <4 x i32> ; SSE41-NEXT: [[STRIDED_VEC4:%.*]] = shufflevector <8 x i16> [[WIDE_VEC1]], <8 x i16> poison, <4 x i32> ; SSE41-NEXT: [[TMP8:%.*]] = sext <4 x i16> [[STRIDED_VEC]] to <4 x i32> ; SSE41-NEXT: [[TMP9:%.*]] = sext <4 x i16> [[STRIDED_VEC2]] to <4 x i32> ; SSE41-NEXT: [[TMP10:%.*]] = getelementptr inbounds i16, ptr [[S2:%.*]], i64 [[TMP2]] ; SSE41-NEXT: [[TMP11:%.*]] = getelementptr inbounds i16, ptr [[S2]], i64 [[TMP3]] -; SSE41-NEXT: [[TMP12:%.*]] = getelementptr inbounds i16, ptr [[TMP10]], i32 0 -; SSE41-NEXT: [[WIDE_VEC5:%.*]] = load <8 x i16>, ptr [[TMP12]], align 2 +; SSE41-NEXT: [[WIDE_VEC5:%.*]] = load <8 x i16>, ptr [[TMP10]], align 2 ; SSE41-NEXT: [[STRIDED_VEC7:%.*]] = shufflevector <8 x i16> [[WIDE_VEC5]], <8 x i16> poison, <4 x i32> ; SSE41-NEXT: [[STRIDED_VEC9:%.*]] = shufflevector <8 x i16> [[WIDE_VEC5]], <8 x i16> poison, <4 x i32> -; SSE41-NEXT: [[TMP13:%.*]] = getelementptr inbounds i16, ptr [[TMP11]], i32 0 -; SSE41-NEXT: [[WIDE_VEC6:%.*]] = load <8 x i16>, ptr [[TMP13]], align 2 +; SSE41-NEXT: [[WIDE_VEC6:%.*]] = load <8 x i16>, ptr [[TMP11]], align 2 ; SSE41-NEXT: [[STRIDED_VEC8:%.*]] = shufflevector <8 x i16> [[WIDE_VEC6]], <8 x i16> poison, <4 x i32> ; SSE41-NEXT: [[STRIDED_VEC10:%.*]] = shufflevector <8 x i16> [[WIDE_VEC6]], <8 x i16> poison, <4 x i32> ; SSE41-NEXT: [[TMP14:%.*]] = sext <4 x i16> [[STRIDED_VEC7]] to <4 x i32> @@ -201,20 +195,16 @@ define void @test_muladd(ptr noalias nocapture %d1, ptr noalias nocapture readon ; AVX1-NEXT: [[TMP9:%.*]] = getelementptr inbounds i16, ptr [[S1]], i64 [[TMP5]] ; AVX1-NEXT: [[TMP10:%.*]] = getelementptr inbounds i16, ptr [[S1]], i64 [[TMP6]] ; AVX1-NEXT: [[TMP11:%.*]] = getelementptr inbounds i16, ptr [[S1]], i64 [[TMP7]] -; AVX1-NEXT: [[TMP12:%.*]] = getelementptr inbounds i16, ptr [[TMP8]], i32 0 -; AVX1-NEXT: [[WIDE_VEC:%.*]] = load <8 x i16>, ptr [[TMP12]], align 2 +; AVX1-NEXT: [[WIDE_VEC:%.*]] = load <8 x i16>, ptr [[TMP8]], align 2 ; AVX1-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i16> [[WIDE_VEC]], <8 x i16> poison, <4 x i32> ; AVX1-NEXT: [[STRIDED_VEC7:%.*]] = shufflevector <8 x i16> [[WIDE_VEC]], <8 x i16> poison, <4 x i32> -; AVX1-NEXT: [[TMP13:%.*]] = getelementptr inbounds i16, ptr [[TMP9]], i32 0 -; AVX1-NEXT: [[WIDE_VEC1:%.*]] = load <8 x i16>, ptr [[TMP13]], align 2 +; AVX1-NEXT: [[WIDE_VEC1:%.*]] = load <8 x i16>, ptr [[TMP9]], align 2 ; AVX1-NEXT: [[STRIDED_VEC4:%.*]] = shufflevector <8 x i16> [[WIDE_VEC1]], <8 x i16> poison, <4 x i32> ; AVX1-NEXT: [[STRIDED_VEC8:%.*]] = shufflevector <8 x i16> [[WIDE_VEC1]], <8 x i16> poison, <4 x i32> -; AVX1-NEXT: [[TMP14:%.*]] = getelementptr inbounds i16, ptr [[TMP10]], i32 0 -; AVX1-NEXT: [[WIDE_VEC2:%.*]] = load <8 x i16>, ptr [[TMP14]], align 2 +; AVX1-NEXT: [[WIDE_VEC2:%.*]] = load <8 x i16>, ptr [[TMP10]], align 2 ; AVX1-NEXT: [[STRIDED_VEC5:%.*]] = shufflevector <8 x i16> [[WIDE_VEC2]], <8 x i16> poison, <4 x i32> ; AVX1-NEXT: [[STRIDED_VEC9:%.*]] = shufflevector <8 x i16> [[WIDE_VEC2]], <8 x i16> poison, <4 x i32> -; AVX1-NEXT: [[TMP15:%.*]] = getelementptr inbounds i16, ptr [[TMP11]], i32 0 -; AVX1-NEXT: [[WIDE_VEC3:%.*]] = load <8 x i16>, ptr [[TMP15]], align 2 +; AVX1-NEXT: [[WIDE_VEC3:%.*]] = load <8 x i16>, ptr [[TMP11]], align 2 ; AVX1-NEXT: [[STRIDED_VEC6:%.*]] = shufflevector <8 x i16> [[WIDE_VEC3]], <8 x i16> poison, <4 x i32> ; AVX1-NEXT: [[STRIDED_VEC10:%.*]] = shufflevector <8 x i16> [[WIDE_VEC3]], <8 x i16> poison, <4 x i32> ; AVX1-NEXT: [[TMP16:%.*]] = sext <4 x i16> [[STRIDED_VEC]] to <4 x i32> @@ -225,20 +215,16 @@ define void @test_muladd(ptr noalias nocapture %d1, ptr noalias nocapture readon ; AVX1-NEXT: [[TMP21:%.*]] = getelementptr inbounds i16, ptr [[S2]], i64 [[TMP5]] ; AVX1-NEXT: [[TMP22:%.*]] = getelementptr inbounds i16, ptr [[S2]], i64 [[TMP6]] ; AVX1-NEXT: [[TMP23:%.*]] = getelementptr inbounds i16, ptr [[S2]], i64 [[TMP7]] -; AVX1-NEXT: [[TMP24:%.*]] = getelementptr inbounds i16, ptr [[TMP20]], i32 0 -; AVX1-NEXT: [[WIDE_VEC11:%.*]] = load <8 x i16>, ptr [[TMP24]], align 2 +; AVX1-NEXT: [[WIDE_VEC11:%.*]] = load <8 x i16>, ptr [[TMP20]], align 2 ; AVX1-NEXT: [[STRIDED_VEC15:%.*]] = shufflevector <8 x i16> [[WIDE_VEC11]], <8 x i16> poison, <4 x i32> ; AVX1-NEXT: [[STRIDED_VEC19:%.*]] = shufflevector <8 x i16> [[WIDE_VEC11]], <8 x i16> poison, <4 x i32> -; AVX1-NEXT: [[TMP25:%.*]] = getelementptr inbounds i16, ptr [[TMP21]], i32 0 -; AVX1-NEXT: [[WIDE_VEC12:%.*]] = load <8 x i16>, ptr [[TMP25]], align 2 +; AVX1-NEXT: [[WIDE_VEC12:%.*]] = load <8 x i16>, ptr [[TMP21]], align 2 ; AVX1-NEXT: [[STRIDED_VEC16:%.*]] = shufflevector <8 x i16> [[WIDE_VEC12]], <8 x i16> poison, <4 x i32> ; AVX1-NEXT: [[STRIDED_VEC20:%.*]] = shufflevector <8 x i16> [[WIDE_VEC12]], <8 x i16> poison, <4 x i32> -; AVX1-NEXT: [[TMP26:%.*]] = getelementptr inbounds i16, ptr [[TMP22]], i32 0 -; AVX1-NEXT: [[WIDE_VEC13:%.*]] = load <8 x i16>, ptr [[TMP26]], align 2 +; AVX1-NEXT: [[WIDE_VEC13:%.*]] = load <8 x i16>, ptr [[TMP22]], align 2 ; AVX1-NEXT: [[STRIDED_VEC17:%.*]] = shufflevector <8 x i16> [[WIDE_VEC13]], <8 x i16> poison, <4 x i32> ; AVX1-NEXT: [[STRIDED_VEC21:%.*]] = shufflevector <8 x i16> [[WIDE_VEC13]], <8 x i16> poison, <4 x i32> -; AVX1-NEXT: [[TMP27:%.*]] = getelementptr inbounds i16, ptr [[TMP23]], i32 0 -; AVX1-NEXT: [[WIDE_VEC14:%.*]] = load <8 x i16>, ptr [[TMP27]], align 2 +; AVX1-NEXT: [[WIDE_VEC14:%.*]] = load <8 x i16>, ptr [[TMP23]], align 2 ; AVX1-NEXT: [[STRIDED_VEC18:%.*]] = shufflevector <8 x i16> [[WIDE_VEC14]], <8 x i16> poison, <4 x i32> ; AVX1-NEXT: [[STRIDED_VEC22:%.*]] = shufflevector <8 x i16> [[WIDE_VEC14]], <8 x i16> poison, <4 x i32> ; AVX1-NEXT: [[TMP28:%.*]] = sext <4 x i16> [[STRIDED_VEC15]] to <4 x i32> @@ -329,14 +315,12 @@ define void @test_muladd(ptr noalias nocapture %d1, ptr noalias nocapture readon ; AVX2-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; AVX2-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 1 ; AVX2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, ptr [[S1:%.*]], i64 [[TMP1]] -; AVX2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[TMP2]], i32 0 -; AVX2-NEXT: [[WIDE_VEC:%.*]] = load <16 x i16>, ptr [[TMP3]], align 2 +; AVX2-NEXT: [[WIDE_VEC:%.*]] = load <16 x i16>, ptr [[TMP2]], align 2 ; AVX2-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i16> [[WIDE_VEC]], <16 x i16> poison, <8 x i32> ; AVX2-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <16 x i16> [[WIDE_VEC]], <16 x i16> poison, <8 x i32> ; AVX2-NEXT: [[TMP4:%.*]] = sext <8 x i16> [[STRIDED_VEC]] to <8 x i32> ; AVX2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[S2:%.*]], i64 [[TMP1]] -; AVX2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[TMP5]], i32 0 -; AVX2-NEXT: [[WIDE_VEC2:%.*]] = load <16 x i16>, ptr [[TMP6]], align 2 +; AVX2-NEXT: [[WIDE_VEC2:%.*]] = load <16 x i16>, ptr [[TMP5]], align 2 ; AVX2-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <16 x i16> [[WIDE_VEC2]], <16 x i16> poison, <8 x i32> ; AVX2-NEXT: [[STRIDED_VEC4:%.*]] = shufflevector <16 x i16> [[WIDE_VEC2]], <16 x i16> poison, <8 x i32> ; AVX2-NEXT: [[TMP7:%.*]] = sext <8 x i16> [[STRIDED_VEC3]] to <8 x i32> diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr56319-vector-exit-cond-optimization-epilogue-vectorization.ll b/llvm/test/Transforms/LoopVectorize/X86/pr56319-vector-exit-cond-optimization-epilogue-vectorization.ll index a9f4b7d7f209cb6..5476ff504edb3de 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/pr56319-vector-exit-cond-optimization-epilogue-vectorization.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr56319-vector-exit-cond-optimization-epilogue-vectorization.ll @@ -19,8 +19,7 @@ define void @pr56319(ptr noalias %src, ptr noalias %dst) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [3 x i8], ptr [[SRC:%.*]], i64 [[TMP0]], i64 0 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 0 -; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <96 x i8>, ptr [[TMP2]], align 1 +; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <96 x i8>, ptr [[TMP1]], align 1 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <96 x i8> [[WIDE_VEC]], <96 x i8> poison, <32 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[DST:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i32 0 @@ -39,8 +38,7 @@ define void @pr56319(ptr noalias %src, ptr noalias %dst) { ; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT4:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX1]], 0 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x i8], ptr [[SRC]], i64 [[TMP6]], i64 0 -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[TMP7]], i32 0 -; CHECK-NEXT: [[WIDE_VEC2:%.*]] = load <6 x i8>, ptr [[TMP8]], align 1 +; CHECK-NEXT: [[WIDE_VEC2:%.*]] = load <6 x i8>, ptr [[TMP7]], align 1 ; CHECK-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <6 x i8> [[WIDE_VEC2]], <6 x i8> poison, <2 x i32> ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[TMP6]] ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i32 0 diff --git a/llvm/test/Transforms/LoopVectorize/X86/strided_load_cost.ll b/llvm/test/Transforms/LoopVectorize/X86/strided_load_cost.ll index cd6abf5da82e37d..4e9b97ef928b13f 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/strided_load_cost.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/strided_load_cost.ll @@ -445,8 +445,7 @@ define void @test(ptr %A, ptr noalias %B) #0 { ; CHECK-NEXT: [[TMP14:%.*]] = add nuw nsw i64 [[TMP6]], 0 ; CHECK-NEXT: [[TMP15:%.*]] = add nuw nsw i64 [[TMP7]], 0 ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1024 x i32], ptr [[A:%.*]], i64 0, i64 [[TMP8]] -; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i32 0 -; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <16 x i32>, ptr [[TMP17]], align 4 +; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <16 x i32>, ptr [[TMP16]], align 4 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> poison, <8 x i32> ; CHECK-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> poison, <8 x i32> ; CHECK-NEXT: [[TMP18:%.*]] = add <8 x i32> [[STRIDED_VEC]], [[STRIDED_VEC1]] @@ -542,8 +541,7 @@ define void @test(ptr %A, ptr noalias %B) #0 { ; MAX-BW-NEXT: [[TMP30:%.*]] = add nuw nsw i64 [[TMP14]], 0 ; MAX-BW-NEXT: [[TMP31:%.*]] = add nuw nsw i64 [[TMP15]], 0 ; MAX-BW-NEXT: [[TMP32:%.*]] = getelementptr inbounds [1024 x i32], ptr [[A:%.*]], i64 0, i64 [[TMP16]] -; MAX-BW-NEXT: [[TMP33:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i32 0 -; MAX-BW-NEXT: [[WIDE_VEC:%.*]] = load <32 x i32>, ptr [[TMP33]], align 4 +; MAX-BW-NEXT: [[WIDE_VEC:%.*]] = load <32 x i32>, ptr [[TMP32]], align 4 ; MAX-BW-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <32 x i32> [[WIDE_VEC]], <32 x i32> poison, <16 x i32> ; MAX-BW-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <32 x i32> [[WIDE_VEC]], <32 x i32> poison, <16 x i32> ; MAX-BW-NEXT: [[TMP34:%.*]] = add <16 x i32> [[STRIDED_VEC]], [[STRIDED_VEC1]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/vectorize-interleaved-accesses-gap.ll b/llvm/test/Transforms/LoopVectorize/X86/vectorize-interleaved-accesses-gap.ll index 0bbf76edde26263..c6a87fa21fb0cc1 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/vectorize-interleaved-accesses-gap.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/vectorize-interleaved-accesses-gap.ll @@ -68,10 +68,9 @@ define void @test_pr59090(ptr %l_out, ptr noalias %b) #0 { ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE14]] ; CHECK: pred.store.continue14: ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[L_OUT:%.*]], i64 [[TMP2]] -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[TMP13]], i32 0 ; CHECK-NEXT: [[INTERLEAVED_MASK:%.*]] = shufflevector <8 x i1> [[TMP1]], <8 x i1> poison, <48 x i32> ; CHECK-NEXT: [[TMP15:%.*]] = and <48 x i1> [[INTERLEAVED_MASK]], -; CHECK-NEXT: call void @llvm.masked.store.v48i8.p0(<48 x i8> , ptr [[TMP14]], i32 1, <48 x i1> [[TMP15]]) +; CHECK-NEXT: call void @llvm.masked.store.v48i8.p0(<48 x i8> , ptr [[TMP13]], i32 1, <48 x i1> [[TMP15]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 8 ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10008 ; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP1:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/vplan-native-inner-loop-only.ll b/llvm/test/Transforms/LoopVectorize/X86/vplan-native-inner-loop-only.ll index 887feb7c03ef940..5ec7c1d45f8cf14 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/vplan-native-inner-loop-only.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/vplan-native-inner-loop-only.ll @@ -28,8 +28,7 @@ define void @test(ptr %A) { ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP5]] ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP6]] ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP7]] -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 0 -; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i32>, ptr [[TMP12]], align 4 +; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i32>, ptr [[TMP8]], align 4 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP13:%.*]] = add <4 x i32> [[STRIDED_VEC]], ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i32> [[TMP13]], i32 0 diff --git a/llvm/test/Transforms/LoopVectorize/interleaved-accesses-different-insert-position.ll b/llvm/test/Transforms/LoopVectorize/interleaved-accesses-different-insert-position.ll index 8773350bdb42433..953c93756fef45c 100644 --- a/llvm/test/Transforms/LoopVectorize/interleaved-accesses-different-insert-position.ll +++ b/llvm/test/Transforms/LoopVectorize/interleaved-accesses-different-insert-position.ll @@ -18,8 +18,7 @@ define void @gep_for_first_member_does_not_dominate_insert_point(ptr %str, ptr n ; CHECK-NEXT: [[TMP2:%.*]] = or disjoint i64 [[TMP1]], 1 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[STR]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[TMP3]], i32 -1 -; CHECK-NEXT: [[TMP41:%.*]] = getelementptr i8, ptr [[TMP4]], i32 0 -; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i8>, ptr [[TMP41]], align 1 +; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i8>, ptr [[TMP4]], align 1 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i8> [[WIDE_VEC]], <8 x i8> poison, <4 x i32> ; CHECK-NEXT: [[STRIDED_VEC2:%.*]] = shufflevector <8 x i8> [[WIDE_VEC]], <8 x i8> poison, <4 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i8> [[STRIDED_VEC2]], [[STRIDED_VEC]] @@ -94,8 +93,7 @@ define void @test_ig_insert_pos_at_end_of_vpbb(ptr noalias %dst, ptr noalias %sr ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr { i16, i16, i16, i16 }, ptr [[SRC]], i64 [[TMP3]], i32 2 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP4]], i32 -4 -; CHECK-NEXT: [[TMP51:%.*]] = getelementptr i16, ptr [[TMP5]], i32 0 -; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <16 x i16>, ptr [[TMP51]], align 2 +; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <16 x i16>, ptr [[TMP5]], align 2 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i16> [[WIDE_VEC]], <16 x i16> poison, <4 x i32> ; CHECK-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <16 x i16> [[WIDE_VEC]], <16 x i16> poison, <4 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i16> [[STRIDED_VEC]], i32 3 diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/remarks-insert-into-small-vector.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/remarks-insert-into-small-vector.ll index 23a9a654c96f9e4..bb806be15c71ca9 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/remarks-insert-into-small-vector.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/remarks-insert-into-small-vector.ll @@ -8,7 +8,7 @@ ; YAML-NEXT: Function: test ; YAML-NEXT: Args: ; YAML-NEXT: - String: 'Stores SLP vectorized with cost ' -; YAML-NEXT: - Cost: '0' +; YAML-NEXT: - Cost: '2' ; YAML-NEXT: - String: ' and with tree size ' ; YAML-NEXT: - TreeSize: '7' diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/revec-getGatherCost.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/revec-getGatherCost.ll index a0cb52a853b7e60..995cd7cfbc880b3 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/revec-getGatherCost.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/revec-getGatherCost.ll @@ -8,7 +8,7 @@ ; YAML: Function: test1 ; YAML: Args: ; YAML: - String: 'Stores SLP vectorized with cost ' -; YAML: - Cost: '4' +; YAML: - Cost: '6' ; YAML: - String: ' and with tree size ' ; YAML: - TreeSize: '5' @@ -47,7 +47,7 @@ declare <4 x float> @llvm.fmuladd.v4f32(<4 x float>, <4 x float>, <4 x float>) ; YAML: Function: test2 ; YAML: Args: ; YAML: - String: 'Stores SLP vectorized with cost ' -; YAML: - Cost: '12' +; YAML: - Cost: '16' ; YAML: - String: ' and with tree size ' ; YAML: - TreeSize: '5' diff --git a/llvm/test/Transforms/SimplifyCFG/hoist-with-metadata.ll b/llvm/test/Transforms/SimplifyCFG/hoist-with-metadata.ll index cbf2924b2819880..18aa5c9e044a98b 100644 --- a/llvm/test/Transforms/SimplifyCFG/hoist-with-metadata.ll +++ b/llvm/test/Transforms/SimplifyCFG/hoist-with-metadata.ll @@ -316,10 +316,80 @@ out: ret void } +define void @hoist_noalias_addrspace_both(i1 %c, ptr %p, i64 %val) { +; CHECK-LABEL: @hoist_noalias_addrspace_both( +; CHECK-NEXT: if: +; CHECK-NEXT: [[T:%.*]] = atomicrmw add ptr [[P:%.*]], i64 [[VAL:%.*]] seq_cst, align 8 +; CHECK-NEXT: ret void +; +if: + br i1 %c, label %then, label %else + +then: + %t = atomicrmw add ptr %p, i64 %val seq_cst, !noalias.addrspace !4 + br label %out + +else: + %e = atomicrmw add ptr %p, i64 %val seq_cst, !noalias.addrspace !4 + br label %out + +out: + ret void +} + +define void @hoist_noalias_addrspace_one(i1 %c, ptr %p, i64 %val) { +; CHECK-LABEL: @hoist_noalias_addrspace_one( +; CHECK-NEXT: if: +; CHECK-NEXT: [[T:%.*]] = atomicrmw add ptr [[P:%.*]], i64 [[VAL:%.*]] seq_cst, align 8 +; CHECK-NEXT: ret void +; +if: + br i1 %c, label %then, label %else + +then: + %t = atomicrmw add ptr %p, i64 %val seq_cst, !noalias.addrspace !4 + br label %out + +else: + %e = atomicrmw add ptr %p, i64 %val seq_cst + br label %out + +out: + ret void +} + +define void @hoist_noalias_addrspace_switch(i64 %i, ptr %p, i64 %val) { +; CHECK-LABEL: @hoist_noalias_addrspace_switch( +; CHECK-NEXT: out: +; CHECK-NEXT: [[T:%.*]] = atomicrmw add ptr [[P:%.*]], i64 [[VAL:%.*]] seq_cst, align 8 +; CHECK-NEXT: ret void +; + switch i64 %i, label %bb0 [ + i64 1, label %bb1 + i64 2, label %bb2 + ] +bb0: + %t = atomicrmw add ptr %p, i64 %val seq_cst, !noalias.addrspace !4 + br label %out +bb1: + %e = atomicrmw add ptr %p, i64 %val seq_cst, !noalias.addrspace !5 + br label %out +bb2: + %f = atomicrmw add ptr %p, i64 %val seq_cst, !noalias.addrspace !6 + br label %out +out: + ret void +} + + !0 = !{ i8 0, i8 1 } !1 = !{ i8 3, i8 5 } !2 = !{} !3 = !{ i8 7, i8 9 } +!4 = !{i32 5, i32 6} +!5 = !{i32 5, i32 7} +!6 = !{i32 4, i32 8} + ;. ; CHECK: [[RNG0]] = !{i8 0, i8 1, i8 3, i8 5} ; CHECK: [[RNG1]] = !{i8 0, i8 1, i8 3, i8 5, i8 7, i8 9} diff --git a/llvm/test/Verifier/noalias-addrspace.ll b/llvm/test/Verifier/noalias-addrspace.ll new file mode 100644 index 000000000000000..67a7293d2561cc4 --- /dev/null +++ b/llvm/test/Verifier/noalias-addrspace.ll @@ -0,0 +1,60 @@ +; RUN: not llvm-as < %s -o /dev/null 2>&1 | FileCheck %s + +; CHECK: It should have at least one range! +; CHECK-NEXT: !0 = !{} +define i64 @noalias_addrspace__empty(ptr %ptr, i64 %val) { + %ret = atomicrmw add ptr %ptr, i64 %val seq_cst, !noalias.addrspace !0 + ret i64 %ret +} + +; CHECK: Unfinished range! +; CHECK-NEXT: !1 = !{i32 0} +define i64 @noalias_addrspace__single_field(ptr %ptr, i64 %val) { + %ret = atomicrmw add ptr %ptr, i64 %val seq_cst, !noalias.addrspace !1 + ret i64 %ret +} + +; CHECK: Range must not be empty! +; CHECK-NEXT: !2 = !{i32 0, i32 0} +define i64 @noalias_addrspace__0_0(ptr %ptr, i64 %val) { + %ret = atomicrmw add ptr %ptr, i64 %val seq_cst, !noalias.addrspace !2 + ret i64 %ret +} + +; CHECK: noalias.addrspace type must be i32! +; CHECK-NEXT: %ret = atomicrmw add ptr %ptr, i64 %val seq_cst, align 8, !noalias.addrspace !3 +define i64 @noalias_addrspace__i64(ptr %ptr, i64 %val) { + %ret = atomicrmw add ptr %ptr, i64 %val seq_cst, !noalias.addrspace !3 + ret i64 %ret +} + +; CHECK: The lower limit must be an integer! +define i64 @noalias_addrspace__fp(ptr %ptr, i64 %val) { + %ret = atomicrmw add ptr %ptr, i64 %val seq_cst, !noalias.addrspace !4 + ret i64 %ret +} + +; CHECK: The lower limit must be an integer! +define i64 @noalias_addrspace__ptr(ptr %ptr, i64 %val) { + %ret = atomicrmw add ptr %ptr, i64 %val seq_cst, !noalias.addrspace !5 + ret i64 %ret +} + +; CHECK: The lower limit must be an integer! +define i64 @noalias_addrspace__nonconstant(ptr %ptr, i64 %val) { + %ret = atomicrmw add ptr %ptr, i64 %val seq_cst, !noalias.addrspace !6 + ret i64 %ret +} + +@gv0 = global i32 0 +@gv1 = global i32 1 + +!0 = !{} +!1 = !{i32 0} +!2 = !{i32 0, i32 0} +!3 = !{i64 1, i64 5} +!4 = !{float 0.0, float 2.0} +!5 = !{ptr null, ptr addrspace(1) null} +!6 = !{i32 ptrtoint (ptr @gv0 to i32), i32 ptrtoint (ptr @gv1 to i32) } + + diff --git a/llvm/tools/gold/gold-plugin.cpp b/llvm/tools/gold/gold-plugin.cpp index 0377791d85b3f87..6d0021c85f20fb7 100644 --- a/llvm/tools/gold/gold-plugin.cpp +++ b/llvm/tools/gold/gold-plugin.cpp @@ -899,7 +899,7 @@ static std::unique_ptr createLTO(IndexWriteCallback OnIndexWrite, std::string OldPrefix, NewPrefix; getThinLTOOldAndNewPrefix(OldPrefix, NewPrefix); Backend = createWriteIndexesThinBackend( - OldPrefix, NewPrefix, + llvm::hardware_concurrency(options::Parallelism), OldPrefix, NewPrefix, // TODO: Add support for optional native object path in // thinlto_prefix_replace option to match lld. /*NativeObjectPrefix=*/"", options::thinlto_emit_imports_files, diff --git a/llvm/tools/llvm-lto2/llvm-lto2.cpp b/llvm/tools/llvm-lto2/llvm-lto2.cpp index 5dd961a603c9e8b..d4f022ef021a44e 100644 --- a/llvm/tools/llvm-lto2/llvm-lto2.cpp +++ b/llvm/tools/llvm-lto2/llvm-lto2.cpp @@ -346,7 +346,8 @@ static int run(int argc, char **argv) { ThinBackend Backend; if (ThinLTODistributedIndexes) - Backend = createWriteIndexesThinBackend(/*OldPrefix=*/"", + Backend = createWriteIndexesThinBackend(llvm::hardware_concurrency(Threads), + /*OldPrefix=*/"", /*NewPrefix=*/"", /*NativeObjectPrefix=*/"", ThinLTOEmitImports, diff --git a/llvm/tools/llvm-remarkutil/RemarkCounter.cpp b/llvm/tools/llvm-remarkutil/RemarkCounter.cpp index dc0685f342886ab..2d9432e41d9c06a 100644 --- a/llvm/tools/llvm-remarkutil/RemarkCounter.cpp +++ b/llvm/tools/llvm-remarkutil/RemarkCounter.cpp @@ -198,12 +198,8 @@ void ArgumentCounter::collect(const Remark &Remark) { } void RemarkCounter::collect(const Remark &Remark) { - std::optional Key = getGroupByKey(Remark); - if (!Key.has_value()) - return; - auto Iter = CountedByRemarksMap.insert({*Key, 1}); - if (!Iter.second) - Iter.first->second += 1; + if (std::optional Key = getGroupByKey(Remark)) + ++CountedByRemarksMap[*Key]; } Error ArgumentCounter::print(StringRef OutputFileName) { diff --git a/llvm/unittests/IR/ConstantFPRangeTest.cpp b/llvm/unittests/IR/ConstantFPRangeTest.cpp index 158d08f9b77a0af..255f62d77b748d6 100644 --- a/llvm/unittests/IR/ConstantFPRangeTest.cpp +++ b/llvm/unittests/IR/ConstantFPRangeTest.cpp @@ -435,6 +435,7 @@ TEST_F(ConstantFPRangeTest, FPClassify) { EXPECT_EQ(SomePos.getSignBit(), false); EXPECT_EQ(SomeNeg.getSignBit(), true); +#if defined(EXPENSIVE_CHECKS) EnumerateConstantFPRanges( [](const ConstantFPRange &CR) { unsigned Mask = fcNone; @@ -458,6 +459,7 @@ TEST_F(ConstantFPRangeTest, FPClassify) { EXPECT_EQ(Mask, CR.classify()) << CR; }, /*Exhaustive=*/true); +#endif } TEST_F(ConstantFPRangeTest, Print) { @@ -500,6 +502,36 @@ TEST_F(ConstantFPRangeTest, MismatchedSemantics) { #endif TEST_F(ConstantFPRangeTest, makeAllowedFCmpRegion) { + EXPECT_EQ(ConstantFPRange::makeAllowedFCmpRegion( + FCmpInst::FCMP_OLE, + ConstantFPRange::getNonNaN(APFloat(1.0), APFloat(2.0))), + ConstantFPRange::getNonNaN(APFloat::getInf(Sem, /*Negative=*/true), + APFloat(2.0))); + EXPECT_EQ( + ConstantFPRange::makeAllowedFCmpRegion( + FCmpInst::FCMP_OLT, + ConstantFPRange::getNonNaN(APFloat(1.0), + APFloat::getInf(Sem, /*Negative=*/false))), + ConstantFPRange::getNonNaN(APFloat::getInf(Sem, /*Negative=*/true), + APFloat::getLargest(Sem, /*Negative=*/false))); + EXPECT_EQ( + ConstantFPRange::makeAllowedFCmpRegion( + FCmpInst::FCMP_OGT, + ConstantFPRange::getNonNaN(APFloat::getZero(Sem, /*Negative=*/true), + APFloat(2.0))), + ConstantFPRange::getNonNaN(APFloat::getSmallest(Sem, /*Negative=*/false), + APFloat::getInf(Sem, /*Negative=*/false))); + EXPECT_EQ(ConstantFPRange::makeAllowedFCmpRegion( + FCmpInst::FCMP_OGE, + ConstantFPRange::getNonNaN(APFloat(1.0), APFloat(2.0))), + ConstantFPRange::getNonNaN( + APFloat(1.0), APFloat::getInf(Sem, /*Negative=*/false))); + EXPECT_EQ(ConstantFPRange::makeAllowedFCmpRegion( + FCmpInst::FCMP_OEQ, + ConstantFPRange::getNonNaN(APFloat(1.0), APFloat(2.0))), + ConstantFPRange::getNonNaN(APFloat(1.0), APFloat(2.0))); + +#if defined(EXPENSIVE_CHECKS) for (auto Pred : FCmpInst::predicates()) { EnumerateConstantFPRanges( [Pred](const ConstantFPRange &CR) { @@ -529,6 +561,210 @@ TEST_F(ConstantFPRangeTest, makeAllowedFCmpRegion) { }, /*Exhaustive=*/false); } +#endif +} + +TEST_F(ConstantFPRangeTest, makeSatisfyingFCmpRegion) { + EXPECT_EQ(ConstantFPRange::makeSatisfyingFCmpRegion( + FCmpInst::FCMP_OLE, + ConstantFPRange::getNonNaN(APFloat(1.0), APFloat(2.0))), + ConstantFPRange::getNonNaN(APFloat::getInf(Sem, /*Negative=*/true), + APFloat(1.0))); + EXPECT_EQ( + ConstantFPRange::makeSatisfyingFCmpRegion( + FCmpInst::FCMP_OLT, ConstantFPRange::getNonNaN( + APFloat::getSmallest(Sem, /*Negative=*/false), + APFloat::getInf(Sem, /*Negative=*/false))), + ConstantFPRange::getNonNaN(APFloat::getInf(Sem, /*Negative=*/true), + APFloat::getZero(Sem, /*Negative=*/false))); + EXPECT_EQ( + ConstantFPRange::makeSatisfyingFCmpRegion( + FCmpInst::FCMP_OGT, ConstantFPRange::getNonNaN( + APFloat::getZero(Sem, /*Negative=*/true), + APFloat::getZero(Sem, /*Negative=*/false))), + ConstantFPRange::getNonNaN(APFloat::getSmallest(Sem, /*Negative=*/false), + APFloat::getInf(Sem, /*Negative=*/false))); + EXPECT_EQ(ConstantFPRange::makeSatisfyingFCmpRegion( + FCmpInst::FCMP_OGE, + ConstantFPRange::getNonNaN(APFloat(1.0), APFloat(2.0))), + ConstantFPRange::getNonNaN( + APFloat(2.0), APFloat::getInf(Sem, /*Negative=*/false))); + EXPECT_EQ(ConstantFPRange::makeSatisfyingFCmpRegion( + FCmpInst::FCMP_OEQ, + ConstantFPRange::getNonNaN(APFloat(1.0), APFloat(2.0))), + ConstantFPRange::getEmpty(Sem)); + EXPECT_EQ(ConstantFPRange::makeSatisfyingFCmpRegion( + FCmpInst::FCMP_OEQ, + ConstantFPRange::getNonNaN(APFloat(1.0), APFloat(1.0))), + ConstantFPRange::getNonNaN(APFloat(1.0), APFloat(1.0))); + +#if defined(EXPENSIVE_CHECKS) + for (auto Pred : FCmpInst::predicates()) { + EnumerateConstantFPRanges( + [Pred](const ConstantFPRange &CR) { + ConstantFPRange Res = + ConstantFPRange::makeSatisfyingFCmpRegion(Pred, CR); + // Super set of the optimal set excluding NaNs + ConstantFPRange SuperSet(CR.getSemantics()); + bool ContainsSNaN = false; + bool ContainsQNaN = false; + unsigned NonNaNValsInOptimalSet = 0; + EnumerateValuesInConstantFPRange( + ConstantFPRange::getFull(CR.getSemantics()), + [&](const APFloat &V) { + if (AnyOfValueInConstantFPRange( + CR, + [&](const APFloat &U) { + return !FCmpInst::compare(V, U, Pred); + }, + /*IgnoreNaNPayload=*/true)) { + EXPECT_FALSE(Res.contains(V)) + << "Wrong result for makeSatisfyingFCmpRegion(" << Pred + << ", " << CR << "). The result " << Res + << " should not contain " << V; + } else { + if (V.isNaN()) { + if (V.isSignaling()) + ContainsSNaN = true; + else + ContainsQNaN = true; + } else { + SuperSet = SuperSet.unionWith(ConstantFPRange(V)); + ++NonNaNValsInOptimalSet; + } + } + }, + /*IgnoreNaNPayload=*/true); + + // Check optimality + + // The usefullness of making the result optimal for one/une is + // questionable. + if (Pred == FCmpInst::FCMP_ONE || Pred == FCmpInst::FCMP_UNE) + return; + + EXPECT_FALSE(ContainsSNaN && !Res.containsSNaN()) + << "Suboptimal result for makeSatisfyingFCmpRegion(" << Pred + << ", " << CR << "), should contain SNaN, but got " << Res; + EXPECT_FALSE(ContainsQNaN && !Res.containsQNaN()) + << "Suboptimal result for makeSatisfyingFCmpRegion(" << Pred + << ", " << CR << "), should contain QNaN, but got " << Res; + + // We only care about the cases where the result is representable by + // ConstantFPRange. + unsigned NonNaNValsInSuperSet = 0; + EnumerateValuesInConstantFPRange( + SuperSet, + [&](const APFloat &V) { + if (!V.isNaN()) + ++NonNaNValsInSuperSet; + }, + /*IgnoreNaNPayload=*/true); + + if (NonNaNValsInSuperSet == NonNaNValsInOptimalSet) { + ConstantFPRange Optimal = + ConstantFPRange(SuperSet.getLower(), SuperSet.getUpper(), + ContainsQNaN, ContainsSNaN); + EXPECT_EQ(Res, Optimal) + << "Suboptimal result for makeSatisfyingFCmpRegion(" << Pred + << ", " << CR << ")"; + } + }, + /*Exhaustive=*/false); + } +#endif +} + +TEST_F(ConstantFPRangeTest, fcmp) { + std::vector InterestingRanges; + const fltSemantics &Sem = APFloat::Float8E4M3(); + auto FpImm = [&](double V) { + bool ignored; + APFloat APF(V); + APF.convert(Sem, APFloat::rmNearestTiesToEven, &ignored); + return APF; + }; + + InterestingRanges.push_back(ConstantFPRange::getEmpty(Sem)); + InterestingRanges.push_back(ConstantFPRange::getFull(Sem)); + InterestingRanges.push_back(ConstantFPRange::getFinite(Sem)); + InterestingRanges.push_back(ConstantFPRange(FpImm(1.0))); + InterestingRanges.push_back( + ConstantFPRange(APFloat::getZero(Sem, /*Negative=*/false))); + InterestingRanges.push_back( + ConstantFPRange(APFloat::getZero(Sem, /*Negative=*/true))); + InterestingRanges.push_back( + ConstantFPRange(APFloat::getInf(Sem, /*Negative=*/false))); + InterestingRanges.push_back( + ConstantFPRange(APFloat::getInf(Sem, /*Negative=*/true))); + InterestingRanges.push_back( + ConstantFPRange(APFloat::getSmallest(Sem, /*Negative=*/false))); + InterestingRanges.push_back( + ConstantFPRange(APFloat::getSmallest(Sem, /*Negative=*/true))); + InterestingRanges.push_back( + ConstantFPRange(APFloat::getLargest(Sem, /*Negative=*/false))); + InterestingRanges.push_back( + ConstantFPRange(APFloat::getLargest(Sem, /*Negative=*/true))); + InterestingRanges.push_back( + ConstantFPRange::getNaNOnly(Sem, /*MayBeQNaN=*/true, /*MayBeSNaN=*/true)); + InterestingRanges.push_back( + ConstantFPRange::getNonNaN(FpImm(0.0), FpImm(1.0))); + InterestingRanges.push_back( + ConstantFPRange::getNonNaN(FpImm(2.0), FpImm(3.0))); + InterestingRanges.push_back( + ConstantFPRange::getNonNaN(FpImm(-1.0), FpImm(1.0))); + InterestingRanges.push_back( + ConstantFPRange::getNonNaN(FpImm(-1.0), FpImm(-0.0))); + InterestingRanges.push_back(ConstantFPRange::getNonNaN( + APFloat::getInf(Sem, /*Negative=*/true), FpImm(-1.0))); + InterestingRanges.push_back(ConstantFPRange::getNonNaN( + FpImm(1.0), APFloat::getInf(Sem, /*Negative=*/false))); + + for (auto &LHS : InterestingRanges) { + for (auto &RHS : InterestingRanges) { + for (auto Pred : FCmpInst::predicates()) { + if (LHS.fcmp(Pred, RHS)) { + EnumerateValuesInConstantFPRange( + LHS, + [&](const APFloat &LHSC) { + EnumerateValuesInConstantFPRange( + RHS, + [&](const APFloat &RHSC) { + EXPECT_TRUE(FCmpInst::compare(LHSC, RHSC, Pred)) + << LHS << " " << Pred << " " << RHS + << " doesn't hold"; + }, + /*IgnoreNaNPayload=*/true); + }, + /*IgnoreNaNPayload=*/true); + } + } + } + } +} + +TEST_F(ConstantFPRangeTest, makeExactFCmpRegion) { + for (auto Pred : FCmpInst::predicates()) { + EnumerateValuesInConstantFPRange( + ConstantFPRange::getFull(APFloat::Float8E4M3()), + [Pred](const APFloat &V) { + std::optional Res = + ConstantFPRange::makeExactFCmpRegion(Pred, V); + ConstantFPRange Allowed = + ConstantFPRange::makeAllowedFCmpRegion(Pred, ConstantFPRange(V)); + ConstantFPRange Satisfying = + ConstantFPRange::makeSatisfyingFCmpRegion(Pred, + ConstantFPRange(V)); + if (Allowed == Satisfying) + EXPECT_EQ(Res, Allowed) << "Wrong result for makeExactFCmpRegion(" + << Pred << ", " << V << ")."; + else + EXPECT_FALSE(Res.has_value()) + << "Wrong result for makeExactFCmpRegion(" << Pred << ", " << V + << ")."; + }, + /*IgnoreNaNPayload=*/true); + } } } // anonymous namespace diff --git a/llvm/unittests/SandboxIR/UtilsTest.cpp b/llvm/unittests/SandboxIR/UtilsTest.cpp index a803c2a1cf977c2..90396eaa53ab380 100644 --- a/llvm/unittests/SandboxIR/UtilsTest.cpp +++ b/llvm/unittests/SandboxIR/UtilsTest.cpp @@ -119,21 +119,21 @@ define void @foo(ptr %ptr) { [[maybe_unused]] auto *V3L3 = cast(&*It++); // getPointerDiffInBytes - EXPECT_EQ(*sandboxir::Utils::getPointerDiffInBytes(L0, L1, SE, DL), 4); - EXPECT_EQ(*sandboxir::Utils::getPointerDiffInBytes(L0, L2, SE, DL), 8); - EXPECT_EQ(*sandboxir::Utils::getPointerDiffInBytes(L1, L0, SE, DL), -4); - EXPECT_EQ(*sandboxir::Utils::getPointerDiffInBytes(L0, V2L0, SE, DL), 0); + EXPECT_EQ(*sandboxir::Utils::getPointerDiffInBytes(L0, L1, SE), 4); + EXPECT_EQ(*sandboxir::Utils::getPointerDiffInBytes(L0, L2, SE), 8); + EXPECT_EQ(*sandboxir::Utils::getPointerDiffInBytes(L1, L0, SE), -4); + EXPECT_EQ(*sandboxir::Utils::getPointerDiffInBytes(L0, V2L0, SE), 0); - EXPECT_EQ(*sandboxir::Utils::getPointerDiffInBytes(L0, V2L1, SE, DL), 4); - EXPECT_EQ(*sandboxir::Utils::getPointerDiffInBytes(L0, V3L1, SE, DL), 4); - EXPECT_EQ(*sandboxir::Utils::getPointerDiffInBytes(V2L0, V2L2, SE, DL), 8); - EXPECT_EQ(*sandboxir::Utils::getPointerDiffInBytes(V2L0, V2L3, SE, DL), 12); - EXPECT_EQ(*sandboxir::Utils::getPointerDiffInBytes(V2L3, V2L0, SE, DL), -12); + EXPECT_EQ(*sandboxir::Utils::getPointerDiffInBytes(L0, V2L1, SE), 4); + EXPECT_EQ(*sandboxir::Utils::getPointerDiffInBytes(L0, V3L1, SE), 4); + EXPECT_EQ(*sandboxir::Utils::getPointerDiffInBytes(V2L0, V2L2, SE), 8); + EXPECT_EQ(*sandboxir::Utils::getPointerDiffInBytes(V2L0, V2L3, SE), 12); + EXPECT_EQ(*sandboxir::Utils::getPointerDiffInBytes(V2L3, V2L0, SE), -12); // atLowerAddress - EXPECT_TRUE(sandboxir::Utils::atLowerAddress(L0, L1, SE, DL)); - EXPECT_FALSE(sandboxir::Utils::atLowerAddress(L1, L0, SE, DL)); - EXPECT_FALSE(sandboxir::Utils::atLowerAddress(L3, V3L3, SE, DL)); + EXPECT_TRUE(sandboxir::Utils::atLowerAddress(L0, L1, SE)); + EXPECT_FALSE(sandboxir::Utils::atLowerAddress(L1, L0, SE)); + EXPECT_FALSE(sandboxir::Utils::atLowerAddress(L3, V3L3, SE)); } TEST_F(UtilsTest, GetExpected) { diff --git a/llvm/unittests/Support/TimeProfilerTest.cpp b/llvm/unittests/Support/TimeProfilerTest.cpp index bb820ec99a393ef..aa1185bae2961fa 100644 --- a/llvm/unittests/Support/TimeProfilerTest.cpp +++ b/llvm/unittests/Support/TimeProfilerTest.cpp @@ -72,4 +72,28 @@ TEST(TimeProfiler, Begin_End_Disabled) { timeTraceProfilerEnd(); } +TEST(TimeProfiler, Instant_Add_Smoke) { + setupProfiler(); + + timeTraceProfilerBegin("sync event", "sync detail"); + timeTraceAddInstantEvent("instant event", [&] { return "instant detail"; }); + timeTraceProfilerEnd(); + + std::string json = teardownProfiler(); + ASSERT_TRUE(json.find(R"("name":"sync event")") != std::string::npos); + ASSERT_TRUE(json.find(R"("detail":"sync detail")") != std::string::npos); + ASSERT_TRUE(json.find(R"("name":"instant event")") != std::string::npos); + ASSERT_TRUE(json.find(R"("detail":"instant detail")") != std::string::npos); +} + +TEST(TimeProfiler, Instant_Not_Added_Smoke) { + setupProfiler(); + + timeTraceAddInstantEvent("instant event", [&] { return "instant detail"; }); + + std::string json = teardownProfiler(); + ASSERT_TRUE(json.find(R"("name":"instant event")") == std::string::npos); + ASSERT_TRUE(json.find(R"("detail":"instant detail")") == std::string::npos); +} + } // namespace diff --git a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/CMakeLists.txt b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/CMakeLists.txt index 9f1a3409c0c3945..dcd7232db5f60c8 100644 --- a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/CMakeLists.txt +++ b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/CMakeLists.txt @@ -11,4 +11,5 @@ add_llvm_unittest(SandboxVectorizerTests DependencyGraphTest.cpp IntervalTest.cpp LegalityTest.cpp - ) + SeedCollectorTest.cpp +) diff --git a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/SeedCollectorTest.cpp b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/SeedCollectorTest.cpp new file mode 100644 index 000000000000000..36400afeaf4c598 --- /dev/null +++ b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/SeedCollectorTest.cpp @@ -0,0 +1,125 @@ +//===- SeedCollectorTest.cpp ----------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Vectorize/SandboxVectorizer/SeedCollector.h" +#include "llvm/AsmParser/Parser.h" +#include "llvm/SandboxIR/Function.h" +#include "llvm/SandboxIR/Instruction.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Testing/Support/SupportHelpers.h" +#include "gtest/gtest.h" + +using namespace llvm; + +struct SeedBundleTest : public testing::Test { + LLVMContext C; + std::unique_ptr M; + + void parseIR(LLVMContext &C, const char *IR) { + SMDiagnostic Err; + M = parseAssemblyString(IR, Err, C); + if (!M) + Err.print("LegalityTest", errs()); + } +}; + +TEST_F(SeedBundleTest, SeedBundle) { + parseIR(C, R"IR( +define void @foo(float %v0, i32 %i0, i16 %i1, i8 %i2) { +bb: + %add0 = fadd float %v0, %v0 + %add1 = fadd float %v0, %v0 + %add2 = add i8 %i2, %i2 + %add3 = add i16 %i1, %i1 + %add4 = add i32 %i0, %i0 + %add5 = add i16 %i1, %i1 + %add6 = add i8 %i2, %i2 + %add7 = add i8 %i2, %i2 + ret void +} +)IR"); + Function &LLVMF = *M->getFunction("foo"); + sandboxir::Context Ctx(C); + auto &F = *Ctx.createFunction(&LLVMF); + DataLayout DL(M->getDataLayout()); + auto *BB = &*F.begin(); + auto It = BB->begin(); + auto *I0 = &*It++; + auto *I1 = &*It++; + // Assume first two instructions are identical in the number of bits. + const unsigned IOBits = sandboxir::Utils::getNumBits(I0, DL); + // Constructor + sandboxir::SeedBundle SBO(I0); + EXPECT_EQ(*SBO.begin(), I0); + // getNumUnusedBits after constructor + EXPECT_EQ(SBO.getNumUnusedBits(), IOBits); + // setUsed + SBO.setUsed(I0); + // allUsed + EXPECT_TRUE(SBO.allUsed()); + // isUsed + EXPECT_TRUE(SBO.isUsed(0)); + // getNumUnusedBits after setUsed + EXPECT_EQ(SBO.getNumUnusedBits(), 0u); + // insertAt + SBO.insertAt(SBO.end(), I1); + EXPECT_NE(*SBO.begin(), I1); + // getNumUnusedBits after insertAt + EXPECT_EQ(SBO.getNumUnusedBits(), IOBits); + // allUsed + EXPECT_FALSE(SBO.allUsed()); + // getFirstUnusedElement + EXPECT_EQ(SBO.getFirstUnusedElementIdx(), 1u); + + SmallVector Insts; + // add2 through add7 + Insts.push_back(&*It++); + Insts.push_back(&*It++); + Insts.push_back(&*It++); + Insts.push_back(&*It++); + Insts.push_back(&*It++); + Insts.push_back(&*It++); + unsigned BundleBits = 0; + for (auto &S : Insts) + BundleBits += sandboxir::Utils::getNumBits(S); + // Ensure the instructions are as expected. + EXPECT_EQ(BundleBits, 88u); + auto Seeds = Insts; + // Constructor + sandboxir::SeedBundle SB1(std::move(Seeds)); + // getNumUnusedBits after constructor + EXPECT_EQ(SB1.getNumUnusedBits(), BundleBits); + // setUsed with index + SB1.setUsed(1); + // getFirstUnusedElementIdx + EXPECT_EQ(SB1.getFirstUnusedElementIdx(), 0u); + SB1.setUsed(unsigned(0)); + // getFirstUnusedElementIdx not at end + EXPECT_EQ(SB1.getFirstUnusedElementIdx(), 2u); + + // getSlice is (StartIdx, MaxVecRegBits, ForcePowerOf2). It's easier to + // compare test cases without the parameter-name comments inline. + auto Slice0 = SB1.getSlice(2, 64, true); + EXPECT_THAT(Slice0, + testing::ElementsAre(Insts[2], Insts[3], Insts[4], Insts[5])); + auto Slice1 = SB1.getSlice(2, 72, true); + EXPECT_THAT(Slice1, + testing::ElementsAre(Insts[2], Insts[3], Insts[4], Insts[5])); + auto Slice2 = SB1.getSlice(2, 80, true); + EXPECT_THAT(Slice2, + testing::ElementsAre(Insts[2], Insts[3], Insts[4], Insts[5])); + + SB1.setUsed(2); + auto Slice3 = SB1.getSlice(3, 64, false); + EXPECT_THAT(Slice3, testing::ElementsAre(Insts[3], Insts[4], Insts[5])); + // getSlice empty case + SB1.setUsed(3); + auto Slice4 = SB1.getSlice(4, /* MaxVecRegBits */ 8, + /* ForcePowerOf2 */ true); + EXPECT_EQ(Slice4.size(), 0u); +} diff --git a/llvm/utils/lit/lit/TestingConfig.py b/llvm/utils/lit/lit/TestingConfig.py index f81b07baeeaed0d..b0d8e7149e553cd 100644 --- a/llvm/utils/lit/lit/TestingConfig.py +++ b/llvm/utils/lit/lit/TestingConfig.py @@ -64,6 +64,8 @@ def fromdefaults(litConfig): "SOURCE_DATE_EPOCH", "GTEST_FILTER", "DFLTCC", + "QEMU_LD_PREFIX", + "QEMU_CPU", ] if sys.platform.startswith("aix"): diff --git a/mlir/include/mlir-c/Dialect/LLVM.h b/mlir/include/mlir-c/Dialect/LLVM.h index d6062bed5c0c0f8..0e6434073437a5f 100644 --- a/mlir/include/mlir-c/Dialect/LLVM.h +++ b/mlir/include/mlir-c/Dialect/LLVM.h @@ -325,7 +325,12 @@ MLIR_CAPI_EXPORTED MlirAttribute mlirLLVMDISubprogramAttrGet( MlirAttribute compileUnit, MlirAttribute scope, MlirAttribute name, MlirAttribute linkageName, MlirAttribute file, unsigned int line, unsigned int scopeLine, uint64_t subprogramFlags, MlirAttribute type, - intptr_t nRetainedNodes, MlirAttribute const *retainedNodes); + intptr_t nRetainedNodes, MlirAttribute const *retainedNodes, + intptr_t nAnnotations, MlirAttribute const *annotations); + +/// Creates a LLVM DIAnnotation attribute. +MLIR_CAPI_EXPORTED MlirAttribute mlirLLVMDIAnnotationAttrGet( + MlirContext ctx, MlirAttribute name, MlirAttribute value); /// Gets the scope from this DISubprogramAttr. MLIR_CAPI_EXPORTED MlirAttribute diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMAttrDefs.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMAttrDefs.td index 2da45eba77655b0..80c22a357287ba5 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMAttrDefs.td +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMAttrDefs.td @@ -586,7 +586,8 @@ def LLVM_DISubprogramAttr : LLVM_Attr<"DISubprogram", "di_subprogram", OptionalParameter<"unsigned">:$scopeLine, OptionalParameter<"DISubprogramFlags">:$subprogramFlags, OptionalParameter<"DISubroutineTypeAttr">:$type, - OptionalArrayRefParameter<"DINodeAttr">:$retainedNodes + OptionalArrayRefParameter<"DINodeAttr">:$retainedNodes, + OptionalArrayRefParameter<"DINodeAttr">:$annotations ); let builders = [ AttrBuilder<(ins @@ -594,11 +595,11 @@ def LLVM_DISubprogramAttr : LLVM_Attr<"DISubprogram", "di_subprogram", "DIScopeAttr":$scope, "StringAttr":$name, "StringAttr":$linkageName, "DIFileAttr":$file, "unsigned":$line, "unsigned":$scopeLine, "DISubprogramFlags":$subprogramFlags, "DISubroutineTypeAttr":$type, - "ArrayRef":$retainedNodes + "ArrayRef":$retainedNodes, "ArrayRef":$annotations ), [{ return $_get($_ctxt, /*recId=*/nullptr, /*isRecSelf=*/false, id, compileUnit, scope, name, linkageName, file, line, scopeLine, - subprogramFlags, type, retainedNodes); + subprogramFlags, type, retainedNodes, annotations); }]> ]; let assemblyFormat = "`<` struct(params) `>`"; @@ -670,6 +671,21 @@ def LLVM_DIImportedEntityAttr : LLVM_Attr<"DIImportedEntity", "di_imported_entit let assemblyFormat = "`<` struct(params) `>`"; } +//===----------------------------------------------------------------------===// +// DIAnnotationAttr +//===----------------------------------------------------------------------===// + +def LLVM_DIAnnotationAttr : LLVM_Attr<"DIAnnotation", + "di_annotation", + /*traits=*/[], "DINodeAttr"> { + let parameters = (ins + "StringAttr":$name, + "StringAttr":$value + ); + + let assemblyFormat = "`<` struct(params) `>`"; +} + //===----------------------------------------------------------------------===// // DISubrangeAttr //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Dialect/SCF/IR/SCFOps.td b/mlir/include/mlir/Dialect/SCF/IR/SCFOps.td index 847040466a85fd3..361f8e0cf79ec64 100644 --- a/mlir/include/mlir/Dialect/SCF/IR/SCFOps.td +++ b/mlir/include/mlir/Dialect/SCF/IR/SCFOps.td @@ -249,7 +249,7 @@ def ForOp : SCF_Op<"for", let skipDefaultBuilders = 1; let builders = [ OpBuilder<(ins "Value":$lowerBound, "Value":$upperBound, "Value":$step, - CArg<"ValueRange", "std::nullopt">:$iterArgs, + CArg<"ValueRange", "std::nullopt">:$initArgs, CArg<"function_ref", "nullptr">)> ]; @@ -1074,7 +1074,7 @@ def WhileOp : SCF_Op<"while", let regions = (region SizedRegion<1>:$before, SizedRegion<1>:$after); let builders = [ - OpBuilder<(ins "TypeRange":$resultTypes, "ValueRange":$operands, + OpBuilder<(ins "TypeRange":$resultTypes, "ValueRange":$inits, "function_ref":$beforeBuilder, "function_ref":$afterBuilder)> ]; diff --git a/mlir/lib/CAPI/Dialect/LLVM.cpp b/mlir/lib/CAPI/Dialect/LLVM.cpp index 03b536d7aad98fb..c7082445dd9c272 100644 --- a/mlir/lib/CAPI/Dialect/LLVM.cpp +++ b/mlir/lib/CAPI/Dialect/LLVM.cpp @@ -303,9 +303,14 @@ MlirAttribute mlirLLVMDISubprogramAttrGet( MlirAttribute compileUnit, MlirAttribute scope, MlirAttribute name, MlirAttribute linkageName, MlirAttribute file, unsigned int line, unsigned int scopeLine, uint64_t subprogramFlags, MlirAttribute type, - intptr_t nRetainedNodes, MlirAttribute const *retainedNodes) { + intptr_t nRetainedNodes, MlirAttribute const *retainedNodes, + intptr_t nAnnotations, MlirAttribute const *annotations) { SmallVector nodesStorage; nodesStorage.reserve(nRetainedNodes); + + SmallVector annotationsStorage; + annotationsStorage.reserve(nAnnotations); + return wrap(DISubprogramAttr::get( unwrap(ctx), cast(unwrap(recId)), isRecSelf, cast(unwrap(id)), @@ -316,6 +321,9 @@ MlirAttribute mlirLLVMDISubprogramAttrGet( cast(unwrap(type)), llvm::map_to_vector( unwrapList(nRetainedNodes, retainedNodes, nodesStorage), + [](Attribute a) { return cast(a); }), + llvm::map_to_vector( + unwrapList(nAnnotations, annotations, annotationsStorage), [](Attribute a) { return cast(a); }))); } @@ -375,3 +383,9 @@ MlirAttribute mlirLLVMDIImportedEntityAttrGet( llvm::map_to_vector(unwrapList(nElements, elements, elementsStorage), [](Attribute a) { return cast(a); }))); } + +MlirAttribute mlirLLVMDIAnnotationAttrGet(MlirContext ctx, MlirAttribute name, + MlirAttribute value) { + return wrap(DIAnnotationAttr::get(unwrap(ctx), cast(unwrap(name)), + cast(unwrap(value)))); +} diff --git a/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp b/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp index 2b33f3773dc7d18..0ccd4133d3761dc 100644 --- a/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp +++ b/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp @@ -351,39 +351,23 @@ struct SchedBarrierOpLowering : public ConvertOpToLLVMPattern { } // namespace -/// If `input` is a vector of bytes, concatentate those bytes in little-endian -/// order to form a single integer of size 8 * [vector length]. This works -/// around a wart in the AMDGPU intrinsics where operations that logically take -/// vectors of bytes instead integers. Since we do not want to expose this -/// implementation detail to MLIR, we correct for it here. +/// Converts a MFMA vector operand from MLIR AMDGPU dialect convention to ROCDL +/// and LLVM AMDGPU intrinsics convention. /// -/// In addition, convert vectors of LLVM bfloats to vectors of i16, since AMDGPU -/// MFMA intrinsics pre-date the bfloat type. -static Value mfmaConcatIfNeeded(ConversionPatternRewriter &rewriter, - Location loc, Value input) { +/// Specifically: +/// 1. If `input` is a vector of N bytes, bitcast it to a (N * 8)-bit integer. +/// 2. If the element type is bfloat16, bitcast it to i16. +static Value convertMFMAVectorOperand(ConversionPatternRewriter &rewriter, + Location loc, Value input) { Type inputType = input.getType(); if (auto vectorType = dyn_cast(inputType)) { if (vectorType.getElementType().isBF16()) return rewriter.create( loc, vectorType.clone(rewriter.getI16Type()), input); - - if (!vectorType.getElementType().isInteger(8)) - return input; - int64_t numBytes = vectorType.getNumElements(); - Type destType = rewriter.getIntegerType(numBytes * 8); - Value result = rewriter.create( - loc, destType, rewriter.getIntegerAttr(destType, 0)); - for (int64_t i = 0; i < numBytes; ++i) { - Value idxConst = createI32Constant(rewriter, loc, i); - Value element = - rewriter.create(loc, input, idxConst); - Value extended = rewriter.create(loc, destType, element); - Value shiftConst = rewriter.create( - loc, destType, rewriter.getIntegerAttr(destType, i * 8)); - Value shifted = rewriter.create(loc, extended, shiftConst); - result = rewriter.create(loc, result, shifted); + if (vectorType.getElementType().isInteger(8)) { + return rewriter.create( + loc, rewriter.getIntegerType(vectorType.getNumElements() * 8), input); } - return result; } return input; } @@ -656,8 +640,8 @@ struct MFMAOpLowering : public ConvertOpToLLVMPattern { OperationState loweredOp(loc, *maybeIntrinsic); loweredOp.addTypes(intrinsicOutType); loweredOp.addOperands( - {mfmaConcatIfNeeded(rewriter, loc, adaptor.getSourceA()), - mfmaConcatIfNeeded(rewriter, loc, adaptor.getSourceB()), + {convertMFMAVectorOperand(rewriter, loc, adaptor.getSourceA()), + convertMFMAVectorOperand(rewriter, loc, adaptor.getSourceB()), adaptor.getDestC(), createI32Constant(rewriter, loc, op.getCbsz()), createI32Constant(rewriter, loc, op.getAbid()), createI32Constant(rewriter, loc, getBlgpField)}); diff --git a/mlir/lib/Dialect/Affine/Analysis/Utils.cpp b/mlir/lib/Dialect/Affine/Analysis/Utils.cpp index 194ee9115e3d7a2..29608647d85746a 100644 --- a/mlir/lib/Dialect/Affine/Analysis/Utils.cpp +++ b/mlir/lib/Dialect/Affine/Analysis/Utils.cpp @@ -1927,9 +1927,9 @@ static std::optional getMemoryFootprintBytes(Block &block, return opInst->emitError("error obtaining memory region\n"); } - auto it = regions.find(region->memref); - if (it == regions.end()) { - regions[region->memref] = std::move(region); + auto [it, inserted] = regions.try_emplace(region->memref); + if (inserted) { + it->second = std::move(region); } else if (failed(it->second->unionBoundingBox(*region))) { return opInst->emitWarning( "getMemoryFootprintBytes: unable to perform a union on a memory " diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMAttrs.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMAttrs.cpp index bd2164e640e7b89..99871dac81d3269 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/LLVMAttrs.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMAttrs.cpp @@ -60,9 +60,9 @@ bool DINodeAttr::classof(Attribute attr) { DIDerivedTypeAttr, DIFileAttr, DIGlobalVariableAttr, DIImportedEntityAttr, DILabelAttr, DILexicalBlockAttr, DILexicalBlockFileAttr, DILocalVariableAttr, DIModuleAttr, - DINamespaceAttr, DINullTypeAttr, DIStringTypeAttr, - DISubprogramAttr, DISubrangeAttr, DISubroutineTypeAttr>( - attr); + DINamespaceAttr, DINullTypeAttr, DIAnnotationAttr, + DIStringTypeAttr, DISubprogramAttr, DISubrangeAttr, + DISubroutineTypeAttr>(attr); } //===----------------------------------------------------------------------===// @@ -221,15 +221,16 @@ DICompositeTypeAttr::getRecSelf(DistinctAttr recId) { //===----------------------------------------------------------------------===// DIRecursiveTypeAttrInterface DISubprogramAttr::withRecId(DistinctAttr recId) { - return DISubprogramAttr::get( - getContext(), recId, getIsRecSelf(), getId(), getCompileUnit(), - getScope(), getName(), getLinkageName(), getFile(), getLine(), - getScopeLine(), getSubprogramFlags(), getType(), getRetainedNodes()); + return DISubprogramAttr::get(getContext(), recId, getIsRecSelf(), getId(), + getCompileUnit(), getScope(), getName(), + getLinkageName(), getFile(), getLine(), + getScopeLine(), getSubprogramFlags(), getType(), + getRetainedNodes(), getAnnotations()); } DIRecursiveTypeAttrInterface DISubprogramAttr::getRecSelf(DistinctAttr recId) { return DISubprogramAttr::get(recId.getContext(), recId, /*isRecSelf=*/true, - {}, {}, {}, {}, {}, 0, 0, {}, {}, {}, {}); + {}, {}, {}, {}, {}, 0, 0, {}, {}, {}, {}, {}); } //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/LLVMIR/Transforms/DIScopeForLLVMFuncOp.cpp b/mlir/lib/Dialect/LLVMIR/Transforms/DIScopeForLLVMFuncOp.cpp index b276e06f93c612a..052e98ea8b8d48e 100644 --- a/mlir/lib/Dialect/LLVMIR/Transforms/DIScopeForLLVMFuncOp.cpp +++ b/mlir/lib/Dialect/LLVMIR/Transforms/DIScopeForLLVMFuncOp.cpp @@ -78,7 +78,7 @@ static void addScopeToFunction(LLVM::LLVMFuncOp llvmFunc, auto subprogramAttr = LLVM::DISubprogramAttr::get( context, id, compileUnitAttr, fileAttr, funcName, funcName, fileAttr, /*line=*/line, /*scopeline=*/col, subprogramFlags, subroutineTypeAttr, - /*retainedNodes=*/{}); + /*retainedNodes=*/{}, /*annotations=*/{}); llvmFunc->setLoc(FusedLoc::get(context, {loc}, subprogramAttr)); } diff --git a/mlir/lib/Dialect/Quant/IR/QuantTypes.cpp b/mlir/lib/Dialect/Quant/IR/QuantTypes.cpp index ac01b37a553077f..7c0d3696486515d 100644 --- a/mlir/lib/Dialect/Quant/IR/QuantTypes.cpp +++ b/mlir/lib/Dialect/Quant/IR/QuantTypes.cpp @@ -322,8 +322,6 @@ LogicalResult UniformQuantizedType::verifyInvariants( // Verify scale. double minScale = getMinScale(expressedType); double maxScale = getMaxScale(expressedType); - if (scale <= 0.0 || std::isinf(scale) || std::isnan(scale)) - return emitError() << "illegal scale: " << scale; if (scale < minScale || scale > maxScale) return emitError() << "scale out of expressed type range [" << minScale << ", " << maxScale << "]"; @@ -388,8 +386,6 @@ LogicalResult UniformQuantizedPerAxisType::verifyInvariants( double minScale = getMinScale(expressedType); double maxScale = getMaxScale(expressedType); for (double scale : scales) { - if (scale <= 0.0 || std::isinf(scale) || std::isnan(scale)) - return emitError() << "illegal scale: " << scale; if (scale < minScale || scale > maxScale) return emitError() << "scale out of expressed type range [" << minScale << ", " << maxScale << "]"; diff --git a/mlir/lib/Dialect/SCF/IR/SCF.cpp b/mlir/lib/Dialect/SCF/IR/SCF.cpp index d1c9fd2d217dad8..2582d4e0df19202 100644 --- a/mlir/lib/Dialect/SCF/IR/SCF.cpp +++ b/mlir/lib/Dialect/SCF/IR/SCF.cpp @@ -312,25 +312,25 @@ void ConditionOp::getSuccessorRegions( //===----------------------------------------------------------------------===// void ForOp::build(OpBuilder &builder, OperationState &result, Value lb, - Value ub, Value step, ValueRange iterArgs, + Value ub, Value step, ValueRange initArgs, BodyBuilderFn bodyBuilder) { OpBuilder::InsertionGuard guard(builder); result.addOperands({lb, ub, step}); - result.addOperands(iterArgs); - for (Value v : iterArgs) + result.addOperands(initArgs); + for (Value v : initArgs) result.addTypes(v.getType()); Type t = lb.getType(); Region *bodyRegion = result.addRegion(); Block *bodyBlock = builder.createBlock(bodyRegion); bodyBlock->addArgument(t, result.location); - for (Value v : iterArgs) + for (Value v : initArgs) bodyBlock->addArgument(v.getType(), v.getLoc()); // Create the default terminator if the builder is not provided and if the // iteration arguments are not provided. Otherwise, leave this to the caller // because we don't know which values to return from the loop. - if (iterArgs.empty() && !bodyBuilder) { + if (initArgs.empty() && !bodyBuilder) { ForOp::ensureTerminator(*bodyRegion, builder, result.location); } else if (bodyBuilder) { OpBuilder::InsertionGuard guard(builder); @@ -3260,23 +3260,23 @@ LogicalResult ReduceReturnOp::verify() { void WhileOp::build(::mlir::OpBuilder &odsBuilder, ::mlir::OperationState &odsState, TypeRange resultTypes, - ValueRange operands, BodyBuilderFn beforeBuilder, + ValueRange inits, BodyBuilderFn beforeBuilder, BodyBuilderFn afterBuilder) { - odsState.addOperands(operands); + odsState.addOperands(inits); odsState.addTypes(resultTypes); OpBuilder::InsertionGuard guard(odsBuilder); // Build before region. SmallVector beforeArgLocs; - beforeArgLocs.reserve(operands.size()); - for (Value operand : operands) { + beforeArgLocs.reserve(inits.size()); + for (Value operand : inits) { beforeArgLocs.push_back(operand.getLoc()); } Region *beforeRegion = odsState.addRegion(); - Block *beforeBlock = odsBuilder.createBlock( - beforeRegion, /*insertPt=*/{}, operands.getTypes(), beforeArgLocs); + Block *beforeBlock = odsBuilder.createBlock(beforeRegion, /*insertPt=*/{}, + inits.getTypes(), beforeArgLocs); if (beforeBuilder) beforeBuilder(odsBuilder, odsState.location, beforeBlock->getArguments()); diff --git a/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp b/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp index defac8308b90927..659eabd2e938800 100644 --- a/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp +++ b/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp @@ -4337,11 +4337,16 @@ LogicalResult PackOp::canonicalize(PackOp packOp, PatternRewriter &rewriter) { dest = rewriter.create(loc, newDestType, packOp.getDest()); } - Value newOp = rewriter.create( - loc, source, dest, packOp.getInnerDimsPos(), packOp.getMixedTiles(), - packOp.getPaddingValue(), packOp.getOuterDimsPerm()); + auto clonedPackOp = cast(rewriter.clone(*packOp)); + Value res = clonedPackOp.getResult(); + rewriter.startOpModification(clonedPackOp); + clonedPackOp.getSourceMutable().assign(source); + clonedPackOp.getDestMutable().assign(dest); + res.setType(dest.getType()); + rewriter.finalizeOpModification(clonedPackOp); + rewriter.replaceOpWithNewOp( - packOp, packOp.getResult().getType(), newOp); + packOp, packOp.getResult().getType(), clonedPackOp); return success(); } diff --git a/mlir/lib/Target/LLVMIR/DebugImporter.cpp b/mlir/lib/Target/LLVMIR/DebugImporter.cpp index 8c6f32f6bb0cd0d..cd992be62b47197 100644 --- a/mlir/lib/Target/LLVMIR/DebugImporter.cpp +++ b/mlir/lib/Target/LLVMIR/DebugImporter.cpp @@ -245,12 +245,32 @@ DISubprogramAttr DebugImporter::translateImpl(llvm::DISubprogram *node) { if (llvm::is_contained(retainedNodes, nullptr)) retainedNodes.clear(); + SmallVector annotations; + // We currently only support `string` values for annotations on the MLIR side. + // Theoretically we could support other primitives, but LLVM is not using + // other types in practice. + if (llvm::DINodeArray rawAnns = node->getAnnotations(); rawAnns) { + for (size_t i = 0, e = rawAnns->getNumOperands(); i < e; ++i) { + const llvm::MDTuple *tuple = cast(rawAnns->getOperand(i)); + if (tuple->getNumOperands() != 2) + continue; + const llvm::MDString *name = cast(tuple->getOperand(0)); + const llvm::MDString *value = + dyn_cast(tuple->getOperand(1)); + if (name && value) { + annotations.push_back(DIAnnotationAttr::get( + context, StringAttr::get(context, name->getString()), + StringAttr::get(context, value->getString()))); + } + } + } + return DISubprogramAttr::get(context, id, translate(node->getUnit()), scope, getStringAttrOrNull(node->getRawName()), getStringAttrOrNull(node->getRawLinkageName()), translate(node->getFile()), node->getLine(), node->getScopeLine(), *subprogramFlags, type, - retainedNodes); + retainedNodes, annotations); } DISubrangeAttr DebugImporter::translateImpl(llvm::DISubrange *node) { diff --git a/mlir/lib/Target/LLVMIR/DebugTranslation.cpp b/mlir/lib/Target/LLVMIR/DebugTranslation.cpp index 8ca3beca6b66f71..92ff079a10c8aa2 100644 --- a/mlir/lib/Target/LLVMIR/DebugTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/DebugTranslation.cpp @@ -102,6 +102,12 @@ DebugTranslation::getMDTupleOrNull(ArrayRef elements) { return nullptr; SmallVector llvmElements = llvm::to_vector( llvm::map_range(elements, [&](DINodeAttr attr) -> llvm::Metadata * { + if (DIAnnotationAttr annAttr = dyn_cast(attr)) { + llvm::Metadata *ops[2] = { + llvm::MDString::get(llvmCtx, annAttr.getName()), + llvm::MDString::get(llvmCtx, annAttr.getValue())}; + return llvm::MDNode::get(llvmCtx, ops); + } return translate(attr); })); return llvm::MDNode::get(llvmCtx, llvmElements); @@ -332,7 +338,8 @@ llvm::DISubprogram *DebugTranslation::translateImpl(DISubprogramAttr attr) { /*ThisAdjustment=*/0, llvm::DINode::FlagZero, static_cast(attr.getSubprogramFlags()), compileUnit, /*TemplateParams=*/nullptr, /*Declaration=*/nullptr, - getMDTupleOrNull(attr.getRetainedNodes())); + getMDTupleOrNull(attr.getRetainedNodes()), nullptr, + getMDTupleOrNull(attr.getAnnotations())); if (attr.getId()) distinctAttrToNode.try_emplace(attr.getId(), node); return node; diff --git a/mlir/lib/Transforms/Mem2Reg.cpp b/mlir/lib/Transforms/Mem2Reg.cpp index 144f9f01142a090..d339073771fb5c0 100644 --- a/mlir/lib/Transforms/Mem2Reg.cpp +++ b/mlir/lib/Transforms/Mem2Reg.cpp @@ -285,10 +285,11 @@ LogicalResult MemorySlotPromotionAnalyzer::computeBlockingUses( mlir::getForwardSlice(slot.ptr, &forwardSlice); for (Operation *user : forwardSlice) { // If the next operation has no blocking uses, everything is fine. - if (!userToBlockingUses.contains(user)) + auto it = userToBlockingUses.find(user); + if (it == userToBlockingUses.end()) continue; - SmallPtrSet &blockingUses = userToBlockingUses[user]; + SmallPtrSet &blockingUses = it->second; SmallVector newBlockingUses; // If the operation decides it cannot deal with removing the blocking uses, diff --git a/mlir/test/CAPI/llvm.c b/mlir/test/CAPI/llvm.c index 36277122801de4b..12a436ad12fc4c6 100644 --- a/mlir/test/CAPI/llvm.c +++ b/mlir/test/CAPI/llvm.c @@ -324,9 +324,16 @@ static void testDebugInfoAttributes(MlirContext ctx) { mlirAttributeDump(di_imported_entity); // CHECK: #llvm.di_imported_entity<{{.*}}> + MlirAttribute di_annotation = mlirLLVMDIAnnotationAttrGet( + ctx, mlirStringAttrGet(ctx, mlirStringRefCreateFromCString("foo")), + mlirStringAttrGet(ctx, mlirStringRefCreateFromCString("bar"))); + + mlirAttributeDump(di_annotation); + // CHECK: #llvm.di_annotation<{{.*}}> + MlirAttribute di_subprogram = mlirLLVMDISubprogramAttrGet( ctx, recId0, false, id, compile_unit, compile_unit, foo, bar, file, 1, 2, - 0, subroutine_type, 1, &di_imported_entity); + 0, subroutine_type, 1, &di_imported_entity, 1, &di_annotation); // CHECK: #llvm.di_subprogram<{{.*}}> mlirAttributeDump(di_subprogram); diff --git a/mlir/test/Conversion/AMDGPUToROCDL/mfma.mlir b/mlir/test/Conversion/AMDGPUToROCDL/mfma.mlir index 7ef9d172d52cd1d..f8a60d37801ebec 100644 --- a/mlir/test/Conversion/AMDGPUToROCDL/mfma.mlir +++ b/mlir/test/Conversion/AMDGPUToROCDL/mfma.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -convert-amdgpu-to-rocdl=chipset=gfx940 | FileCheck %s +// RUN: mlir-opt %s -convert-amdgpu-to-rocdl=chipset=gfx940 -cse | FileCheck %s func.func @mfma_to_rocdl(%arg0 : f32, %arg1 : vector<32xf32>, %arg2 : vector<16xf32>, %arg3 : vector<4xf32>, %arg4 : vector<4xf16>, %arg5 : vector<4xi8>, @@ -28,7 +28,8 @@ func.func @mfma_to_rocdl(%arg0 : f32, %arg1 : vector<32xf32>, amdgpu.mfma %arg4 * %arg4 + %arg2 { abid = 0 : i32, cbsz = 0 : i32, k = 8 : i32, m = 32 : i32, n = 32 : i32, blocks = 1 : i32 } blgp = none : vector<4xf16>, vector<4xf16>, vector<16xf32> // CHECK: rocdl.mfma.f32.16x16x16f16{{.*}}: (vector<4xf16>, vector<4xf16>, vector<4xf32>, i32, i32, i32) -> vector<4xf32> amdgpu.mfma %arg4 * %arg4 + %arg3 { abid = 0 : i32, cbsz = 0 : i32, k = 16 : i32, m = 16 : i32, n = 16 : i32, blocks = 1 : i32 } blgp = none : vector<4xf16>, vector<4xf16>, vector<4xf32> - // CHECK: rocdl.mfma.i32.32x32x4i8{{.*}}: (i32, i32, vector<32xi32>, i32, i32, i32) -> vector<32xi32> + // CHECK: %[[BITCAST_4xi8_i32:.+]] = llvm.bitcast {{.*}} : vector<4xi8> to i32 + // CHECK: rocdl.mfma.i32.32x32x4i8 %[[BITCAST_4xi8_i32]], %[[BITCAST_4xi8_i32]], {{.*}}: (i32, i32, vector<32xi32>, i32, i32, i32) -> vector<32xi32> amdgpu.mfma %arg5 * %arg5 + %arg6 { abid = 0 : i32, cbsz = 0 : i32, k = 4 : i32, m = 32 : i32, n = 32 : i32, blocks = 2 : i32 } blgp = none : vector<4xi8>, vector<4xi8>, vector<32xi32> // CHECK: rocdl.mfma.i32.16x16x4i8{{.*}}: (i32, i32, vector<16xi32>, i32, i32, i32) -> vector<16xi32> amdgpu.mfma %arg5 * %arg5 + %arg7 { abid = 0 : i32, cbsz = 0 : i32, k = 4 : i32, m = 16 : i32, n = 16 : i32, blocks = 4 : i32 } blgp = none : vector<4xi8>, vector<4xi8>, vector<16xi32> @@ -38,7 +39,8 @@ func.func @mfma_to_rocdl(%arg0 : f32, %arg1 : vector<32xf32>, amdgpu.mfma %arg5 * %arg5 + %arg7 { abid = 0 : i32, cbsz = 0 : i32, k = 8 : i32, m = 32 : i32, n = 32 : i32, blocks = 1 : i32 } blgp = none : vector<4xi8>, vector<4xi8>, vector<16xi32> // CHECK: rocdl.mfma.i32.16x16x16i8{{.*}}: (i32, i32, vector<4xi32>, i32, i32, i32) -> vector<4xi32> amdgpu.mfma %arg5 * %arg5 + %arg8 { abid = 0 : i32, cbsz = 0 : i32, k = 16 : i32, m = 16 : i32, n = 16 : i32, blocks = 1 : i32 } blgp = none : vector<4xi8>, vector<4xi8>, vector<4xi32> - // CHECK: rocdl.mfma.f32.32x32x2bf16{{.*}}: (vector<2xi16>, vector<2xi16>, vector<32xf32>, i32, i32, i32) -> vector<32xf32> + // CHECK: %[[BITCAST_2xbf16_2xi16:.+]] = llvm.bitcast {{.*}} : vector<2xbf16> to vector<2xi16> + // CHECK: rocdl.mfma.f32.32x32x2bf16 %[[BITCAST_2xbf16_2xi16]], %[[BITCAST_2xbf16_2xi16]], %{{.*}}: (vector<2xi16>, vector<2xi16>, vector<32xf32>, i32, i32, i32) -> vector<32xf32> amdgpu.mfma %arg9 * %arg9 + %arg1 { abid = 0 : i32, cbsz = 0 : i32, k = 2 : i32, m = 32 : i32, n = 32 : i32, blocks = 2 : i32 } blgp = none : vector<2xbf16>, vector<2xbf16>, vector<32xf32> // CHECK: rocdl.mfma.f32.16x16x2bf16{{.*}}: (vector<2xi16>, vector<2xi16>, vector<16xf32>, i32, i32, i32) -> vector<16xf32> amdgpu.mfma %arg9 * %arg9 + %arg2 { abid = 0 : i32, cbsz = 0 : i32, k = 2 : i32, m = 16 : i32, n = 16 : i32, blocks = 4 : i32 } blgp = none : vector<2xbf16>, vector<2xbf16>, vector<16xf32> @@ -48,7 +50,8 @@ func.func @mfma_to_rocdl(%arg0 : f32, %arg1 : vector<32xf32>, amdgpu.mfma %arg9 * %arg9 + %arg2 { abid = 0 : i32, cbsz = 0 : i32, k = 4 : i32, m = 32 : i32, n = 32 : i32, blocks = 1 : i32 } blgp = none : vector<2xbf16>, vector<2xbf16>, vector<16xf32> // CHECK: rocdl.mfma.f32.16x16x8bf16{{.*}}: (vector<2xi16>, vector<2xi16>, vector<4xf32>, i32, i32, i32) -> vector<4xf32> amdgpu.mfma %arg9 * %arg9 + %arg3 { abid = 0 : i32, cbsz = 0 : i32, k = 8 : i32, m = 16 : i32, n = 16 : i32, blocks = 1 : i32 } blgp = none : vector<2xbf16>, vector<2xbf16>, vector<4xf32> - // CHECK: rocdl.mfma.f32.32x32x4bf16.1k{{.*}}: (vector<4xi16>, vector<4xi16>, vector<32xf32>, i32, i32, i32) -> vector<32xf32> + // CHECK: %[[BITCAST_4xbf16_4xi16:.+]] = llvm.bitcast {{.*}} : vector<4xbf16> to vector<4xi16> + // CHECK: rocdl.mfma.f32.32x32x4bf16.1k %[[BITCAST_4xbf16_4xi16]], %[[BITCAST_4xbf16_4xi16]], {{.*}}: (vector<4xi16>, vector<4xi16>, vector<32xf32>, i32, i32, i32) -> vector<32xf32> amdgpu.mfma %arg10 * %arg10 + %arg1 { abid = 0 : i32, cbsz = 0 : i32, k = 4 : i32, m = 32 : i32, n = 32 : i32, blocks = 2 : i32 } blgp = none : vector<4xbf16>, vector<4xbf16>, vector<32xf32> // CHECK: rocdl.mfma.f32.16x16x4bf16.1k{{.*}}: (vector<4xi16>, vector<4xi16>, vector<16xf32>, i32, i32, i32) -> vector<16xf32> amdgpu.mfma %arg10 * %arg10 + %arg2 { abid = 0 : i32, cbsz = 0 : i32, k = 4 : i32, m = 16 : i32, n = 16 : i32, blocks = 4 : i32 } blgp = none : vector<4xbf16>, vector<4xbf16>, vector<16xf32> @@ -62,7 +65,8 @@ func.func @mfma_to_rocdl(%arg0 : f32, %arg1 : vector<32xf32>, amdgpu.mfma %arg11 * %arg11 + %arg12 { abid = 0 : i32, cbsz = 0 : i32, k = 4 : i32, m = 16 : i32, n = 16 : i32, blocks = 1 : i32 } blgp = none : f64, f64, vector<4xf64> // CHECK: rocdl.mfma.f64.4x4x4f64{{.*}}: (f64, f64, f64, i32, i32, i32) -> f64 amdgpu.mfma %arg11 * %arg11 + %arg11 { abid = 0 : i32, cbsz = 0 : i32, k = 4 : i32, m = 4 : i32, n = 4 : i32, blocks = 4 : i32 } blgp = none : f64, f64, f64 - // CHECK: rocdl.mfma.i32.16x16x32.i8{{.*}}: (i64, i64, vector<4xi32>, i32, i32, i32) -> vector<4xi32> + // CHECK: %[[BITCAST_8xi8_i64:.+]] = llvm.bitcast {{.*}} : vector<8xi8> to i64 + // CHECK: rocdl.mfma.i32.16x16x32.i8 %[[BITCAST_8xi8_i64]], %[[BITCAST_8xi8_i64]], {{.*}}: (i64, i64, vector<4xi32>, i32, i32, i32) -> vector<4xi32> amdgpu.mfma %arg13 * %arg13 + %arg8 { abid = 0 : i32, cbsz = 0 : i32, k = 32 : i32, m = 16 : i32, n = 16 : i32, blocks = 1 : i32 } blgp = none : vector<8xi8>, vector<8xi8>, vector<4xi32> // CHECK: rocdl.mfma.i32.32x32x16.i8{{.*}}: (i64, i64, vector<16xi32>, i32, i32, i32) -> vector<16xi32> amdgpu.mfma %arg13 * %arg13 + %arg7 { abid = 0 : i32, cbsz = 0 : i32, k = 16 : i32, m = 32 : i32, n = 32 : i32, blocks = 1 : i32 } blgp = none : vector<8xi8>, vector<8xi8>, vector<16xi32> @@ -70,9 +74,11 @@ func.func @mfma_to_rocdl(%arg0 : f32, %arg1 : vector<32xf32>, amdgpu.mfma %arg14 * %arg14 + %arg3 { abid = 0 : i32, cbsz = 0 : i32, k = 8 : i32, m = 16 : i32, n = 16 : i32, blocks = 1 : i32, reducePrecision } blgp = none : vector<2xf32>, vector<2xf32>, vector<4xf32> // CHECK: rocdl.mfma.f32.32x32x4.xf32{{.*}}: (vector<2xf32>, vector<2xf32>, vector<16xf32>, i32, i32, i32) -> vector<16xf32> amdgpu.mfma %arg14 * %arg14 + %arg2 { abid = 0 : i32, cbsz = 0 : i32, k = 4 : i32, m = 32 : i32, n = 32 : i32, blocks = 1 : i32, reducePrecision } blgp = none : vector<2xf32>, vector<2xf32>, vector<16xf32> - // CHECK: rocdl.mfma.f32.16x16x32.bf8.bf8{{.*}}: (i64, i64, vector<4xf32>, i32, i32, i32) -> vector<4xf32> + // CHECK: %[[BITCAST_8xi8_i64_1:.+]] = llvm.bitcast {{.*}} : vector<8xi8> to i64 + // CHECK: rocdl.mfma.f32.16x16x32.bf8.bf8 %[[BITCAST_8xi8_i64_1]], %[[BITCAST_8xi8_i64_1]], {{.*}}: (i64, i64, vector<4xf32>, i32, i32, i32) -> vector<4xf32> amdgpu.mfma %arg15 * %arg15 + %arg3 { abid = 0 : i32, cbsz = 0 : i32, k = 32 : i32, m = 16 : i32, n = 16 : i32, blocks = 1 : i32 } blgp = none : vector<8xf8E5M2FNUZ>, vector<8xf8E5M2FNUZ>, vector<4xf32> - // CHECK: rocdl.mfma.f32.16x16x32.bf8.fp8{{.*}}: (i64, i64, vector<4xf32>, i32, i32, i32) -> vector<4xf32> + // CHECK: %[[BITCAST_8xi8_i64_2:.+]] = llvm.bitcast {{.*}} : vector<8xi8> to i64 + // CHECK: rocdl.mfma.f32.16x16x32.bf8.fp8 %[[BITCAST_8xi8_i64_1]], %[[BITCAST_8xi8_i64_2]], {{.*}}: (i64, i64, vector<4xf32>, i32, i32, i32) -> vector<4xf32> amdgpu.mfma %arg15 * %arg16 + %arg3 { abid = 0 : i32, cbsz = 0 : i32, k = 32 : i32, m = 16 : i32, n = 16 : i32, blocks = 1 : i32 } blgp = none : vector<8xf8E5M2FNUZ>, vector<8xf8E4M3FNUZ>, vector<4xf32> // CHECK: rocdl.mfma.f32.16x16x32.fp8.bf8{{.*}}: (i64, i64, vector<4xf32>, i32, i32, i32) -> vector<4xf32> amdgpu.mfma %arg16 * %arg15 + %arg3 { abid = 0 : i32, cbsz = 0 : i32, k = 32 : i32, m = 16 : i32, n = 16 : i32, blocks = 1 : i32 } blgp = none : vector<8xf8E4M3FNUZ>, vector<8xf8E5M2FNUZ>, vector<4xf32> diff --git a/mlir/test/Dialect/LLVMIR/debuginfo.mlir b/mlir/test/Dialect/LLVMIR/debuginfo.mlir index 6d9b7799c3917f9..af95ec97833a135 100644 --- a/mlir/test/Dialect/LLVMIR/debuginfo.mlir +++ b/mlir/test/Dialect/LLVMIR/debuginfo.mlir @@ -116,10 +116,11 @@ apinotes = "/", line = 42, isDecl = true > -// CHECK-DAG: #[[SP2:.*]] = #llvm.di_subprogram +// CHECK-DAG: #[[SP2:.*]] = #llvm.di_subprogram #sp2 = #llvm.di_subprogram< compileUnit = #cu, scope = #module, name = "value", - file = #file, subprogramFlags = "Definition", type = #spType2 + file = #file, subprogramFlags = "Definition", type = #spType2, + annotations = #llvm.di_annotation > // CHECK-DAG: #[[BLOCK0:.*]] = #llvm.di_lexical_block diff --git a/mlir/test/Dialect/Quant/parse-uniform-invalid.mlir b/mlir/test/Dialect/Quant/parse-uniform-invalid.mlir index 7613a344cf2b8f7..4528d2826a850c8 100644 --- a/mlir/test/Dialect/Quant/parse-uniform-invalid.mlir +++ b/mlir/test/Dialect/Quant/parse-uniform-invalid.mlir @@ -107,7 +107,7 @@ // ----- // Illegal scale: negative -// expected-error@+1 {{illegal scale: -1.000000}} +// expected-error@+1 {{scale out of expressed type range}} !qalias = !quant.uniform:f32, -1.0:127> // ----- diff --git a/mlir/test/Dialect/Tensor/canonicalize.mlir b/mlir/test/Dialect/Tensor/canonicalize.mlir index 86754c1c37536d8..0aa2d33ef17ed46 100644 --- a/mlir/test/Dialect/Tensor/canonicalize.mlir +++ b/mlir/test/Dialect/Tensor/canonicalize.mlir @@ -2357,7 +2357,7 @@ func.func @unpack_pack_with_padding_no_canonicalization(%t: tensor<256x512xbf16> %tensor_empty = tensor.empty() : tensor<4x16x64x32xbf16> %tensor_empty1 = tensor.empty() : tensor<224x512xbf16> %packed = tensor.pack %t outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [64, 32] into %tensor_empty : tensor<256x512xbf16> -> tensor<4x16x64x32xbf16> - %unpacked = tensor.unpack %packed inner_dims_pos = [0, 1] inner_tiles = [64, 32] into %tensor_empty1 : tensor<4x16x64x32xbf16> -> tensor<224x512xbf16> + %unpacked = tensor.unpack %packed inner_dims_pos = [0, 1] inner_tiles = [64, 32] into %tensor_empty1 : tensor<4x16x64x32xbf16> -> tensor<224x512xbf16> return %unpacked : tensor<224x512xbf16> } @@ -2707,3 +2707,14 @@ func.func @test_destination_multiple_result(%arg0: tensor<2x2xf32>, %arg1: tenso %0:2 = test.destination_style_op ins(%cast : tensor) outs(%cast_0 : tensor) -> tensor, index return %0#1 : index } + +// ----- + +// CHECK-LABEL: func.func @pack_dont_drop_attributes( +// CHECK: tensor.pack {{.*}} {test_attr} +func.func @pack_dont_drop_attributes(%arg0: tensor, %arg1: tensor<128x?x100x16x1xf16>) -> tensor<128x?x100x16x1xf16> { + %c32_i64 = arith.constant 32 : i64 + %cst = arith.constant 0.000000e+00 : f16 + %pack = tensor.pack %arg0 padding_value(%cst : f16) outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [16, 1] into %arg1 {test_attr} : tensor -> tensor<128x?x100x16x1xf16> + return %pack : tensor<128x?x100x16x1xf16> +} diff --git a/mlir/test/Target/LLVMIR/Import/debug-info.ll b/mlir/test/Target/LLVMIR/Import/debug-info.ll index 02e35ae7f0ee9df..6267990b0bf803e 100644 --- a/mlir/test/Target/LLVMIR/Import/debug-info.ll +++ b/mlir/test/Target/LLVMIR/Import/debug-info.ll @@ -816,3 +816,30 @@ define void @imp_fn() !dbg !12 { ; CHECK-DAG: #[[SP_REC:.+]] = #llvm.di_subprogram, isRecSelf = true> ; CHECK-DAG: #[[IE:.+]] = #llvm.di_imported_entity ; CHECK-DAG: #[[SP:.+]] = #llvm.di_subprogram<{{.*}}name = "imp_fn"{{.*}}retainedNodes = #[[IE]]> + +; // ----- + +; Test that annotations are handled correctly + +; CHECK-LABEL: @fn_with_annotations + +define void @fn_with_annotations() !dbg !12 { + ret void +} + +!llvm.module.flags = !{!10} +!llvm.dbg.cu = !{!4} + +!2 = !DIModule(scope: !4, name: "mod1", file: !3, line: 1) +!3 = !DIFile(filename: "test.f90", directory: "") +!4 = distinct !DICompileUnit(language: DW_LANG_Fortran95, file: !3) +!8 = !DIModule(scope: !4, name: "mod1", file: !3, line: 5) +!10 = !{i32 2, !"Debug Info Version", i32 3} +!12 = distinct !DISubprogram(name: "fn_with_annotations", linkageName: "fn_with_annotations", scope: !3, file: !3, line: 10, type: !14, scopeLine: 10, spFlags: DISPFlagDefinition, unit: !4, annotations: !16) +!14 = !DISubroutineType(cc: DW_CC_program, types: !15) +!15 = !{} +!16 = !{!17} +!17 = !{!"foo", !"bar"} + + +; CHECK-DAG: #llvm.di_subprogram<{{.*}}name = "fn_with_annotations"{{.*}}annotations = #llvm.di_annotation> diff --git a/mlir/test/Target/LLVMIR/llvmir-debug.mlir b/mlir/test/Target/LLVMIR/llvmir-debug.mlir index 01194df50477423..b09a60b8dcac90a 100644 --- a/mlir/test/Target/LLVMIR/llvmir-debug.mlir +++ b/mlir/test/Target/LLVMIR/llvmir-debug.mlir @@ -89,7 +89,8 @@ llvm.func @func_no_debug() { #spType1 = #llvm.di_subroutine_type #sp1 = #llvm.di_subprogram< compileUnit = #cu, scope = #module, name = "empty_types", - file = #file, subprogramFlags = "Definition", type = #spType1 + file = #file, subprogramFlags = "Definition", type = #spType1, + annotations = #llvm.di_annotation > // CHECK-LABEL: define void @func_with_debug( @@ -177,11 +178,14 @@ llvm.func @empty_types() { // CHECK: ![[CALLEE_ARGS]] = !{![[ARG_TYPE:.*]], ![[ARG_TYPE:.*]]} // CHECK: ![[INLINE_LOC]] = !DILocation(line: 28, column: 5, -// CHECK: ![[EMPTY_TYPES_LOC]] = distinct !DISubprogram(name: "empty_types", scope: ![[MODULE:.*]], file: ![[CU_FILE_LOC]], type: ![[EMPTY_TYPES_TYPE:.*]], spFlags: DISPFlagDefinition +// CHECK: ![[EMPTY_TYPES_LOC]] = distinct !DISubprogram(name: "empty_types", scope: ![[MODULE:.*]], file: ![[CU_FILE_LOC]], type: ![[EMPTY_TYPES_TYPE:.*]], spFlags: DISPFlagDefinition, unit: ![[CU_LOC]], annotations: ![[ANNOTATIONS:.*]]) // CHECK: ![[MODULE]] = !DIModule(scope: ![[CU_FILE_LOC]], name: "module", configMacros: "bar", includePath: "/", apinotes: "/", file: ![[CU_FILE_LOC]], line: 42, isDecl: true) // CHECK: ![[EMPTY_TYPES_TYPE]] = !DISubroutineType(cc: DW_CC_normal, types: ![[EMPTY_TYPES_ARGS:.*]]) // CHECK: ![[EMPTY_TYPES_ARGS]] = !{} +// CHECK: ![[ANNOTATIONS]] = !{![[ANNOTATION:.*]]} +// CHECK: ![[ANNOTATION]] = !{!"foo", !"bar"} + // ----- #di_file = #llvm.di_file<"foo.mlir" in "/test/"> diff --git a/openmp/runtime/src/CMakeLists.txt b/openmp/runtime/src/CMakeLists.txt index 2dd54b5116d9204..439cc20963a1298 100644 --- a/openmp/runtime/src/CMakeLists.txt +++ b/openmp/runtime/src/CMakeLists.txt @@ -212,6 +212,15 @@ if(OPENMP_MSVC_NAME_SCHEME) LINK_FLAGS "${LIBOMP_CONFIGURED_LDFLAGS}" LINKER_LANGUAGE ${LIBOMP_LINKER_LANGUAGE} ) +elseif(${CMAKE_SYSTEM_NAME} MATCHES "AIX") + set(LIBOMP_SHARED_OUTPUT_NAME "omp" CACHE STRING "Output name for the shared libomp runtime library.") + set_target_properties(omp PROPERTIES + OUTPUT_NAME "${LIBOMP_SHARED_OUTPUT_NAME}" + LINK_FLAGS "${LIBOMP_CONFIGURED_LDFLAGS}" + LINKER_LANGUAGE ${LIBOMP_LINKER_LANGUAGE} + VERSION "1.0" + SOVERSION "1" + ) else() set_target_properties(omp PROPERTIES PREFIX "" SUFFIX "" OUTPUT_NAME "${LIBOMP_LIB_FILE}" @@ -426,11 +435,7 @@ if(WIN32) endforeach() else() - if(${CMAKE_SYSTEM_NAME} MATCHES "AIX") - install(FILES ${LIBOMP_LIBRARY_DIR}/libomp.a DESTINATION "${OPENMP_INSTALL_LIBDIR}" COMPONENT runtime) - else() - install(TARGETS omp ${export_to_llvmexports} ${LIBOMP_INSTALL_KIND} DESTINATION "${OPENMP_INSTALL_LIBDIR}") - endif() + install(TARGETS omp ${export_to_llvmexports} ${LIBOMP_INSTALL_KIND} DESTINATION "${OPENMP_INSTALL_LIBDIR}") if(${LIBOMP_INSTALL_ALIASES}) # Create aliases (symlinks) of the library for backwards compatibility